On Wed, Aug 17, 2016 at 09:13:04PM +1000, David Gwynne wrote:
> On Wed, Aug 17, 2016 at 08:27:19PM +1000, David Gwynne wrote:
> > On Thu, Aug 11, 2016 at 02:43:16PM +1000, David Gwynne wrote:
> > > ive been tinkering with per cpu memory in the kernel.
> > 
> > mpi@ wanted to see another example of moving something to per cpu counters 
> > and challenged me to do tcpstat.
> > 
> > the diff below does that, but with a couple of warts.
> > 
> > firstly, it still exports a struct tcpstat to userland. it does
> > this by having an enum that shadows the entries in tcpstat which
> > is used as indexes in the array of counters. mapping the counter
> > array into tcpstat for export is ugly boilerplate.
> > 
> > secondly, some tcp syn cache stats dont map well to per cpu counters.
> > to cope with that ive split the syn cache counters out into their
> > own struct. i personally would like to replace the syn cache hash
> > with a red black tree, which would happily solve that problem by
> > accident.
> > 
> > could someone test and see if this has an impact on tcp speed?
> > 
> > dlg
> 
> the last diff was borked. this one should be cleaner.

and again, cos i missed proctors cksum changes.

sigh sigh

Index: conf/files
===================================================================
RCS file: /cvs/src/sys/conf/files,v
retrieving revision 1.624
diff -u -p -r1.624 files
--- conf/files  13 Aug 2016 20:35:57 -0000      1.624
+++ conf/files  17 Aug 2016 11:51:54 -0000
@@ -692,6 +692,7 @@ file kern/subr_evcount.c
 file kern/subr_extent.c
 file kern/subr_hibernate.c             hibernate
 file kern/subr_log.c
+file kern/subr_percpu.c
 file kern/subr_poison.c                        diagnostic
 file kern/subr_pool.c
 file kern/dma_alloc.c
Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.253
diff -u -p -r1.253 init_main.c
--- kern/init_main.c    17 May 2016 23:28:03 -0000      1.253
+++ kern/init_main.c    17 Aug 2016 11:51:54 -0000
@@ -143,6 +143,7 @@ void        init_exec(void);
 void   kqueue_init(void);
 void   taskq_init(void);
 void   pool_gc_pages(void *);
+void   percpu_init(void);
 
 extern char sigcode[], esigcode[], sigcoderet[];
 #ifdef SYSCALL_DEBUG
@@ -354,6 +355,9 @@ main(void *framep)
        /* Configure virtual memory system, set vm rlimits. */
        uvm_init_limits(p);
 
+       /* Per CPU memory allocation */
+       percpu_init();
+
        /* Initialize the file systems. */
 #if defined(NFSSERVER) || defined(NFSCLIENT)
        nfs_init();                     /* initialize server/shared data */
Index: kern/subr_percpu.c
===================================================================
RCS file: kern/subr_percpu.c
diff -N kern/subr_percpu.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ kern/subr_percpu.c  17 Aug 2016 11:51:54 -0000
@@ -0,0 +1,326 @@
+/*     $OpenBSD$ */
+
+/*
+ * Copyright (c) 2016 David Gwynne <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/pool.h>
+#include <sys/malloc.h>
+#include <sys/types.h>
+
+#include <sys/percpu.h>
+
+#ifdef MULTIPROCESSOR
+struct pool cpumem_pl;
+
+void
+percpu_init(void)
+{
+       pool_init(&cpumem_pl, sizeof(struct cpumem) * ncpus, 0, 0,
+           PR_WAITOK, "percpumem", &pool_allocator_single);
+       pool_setipl(&cpumem_pl, IPL_NONE);
+}
+
+struct cpumem *
+cpumem_get(struct pool *pp)
+{
+       struct cpumem *cm;
+       unsigned int cpu;
+
+       cm = pool_get(&cpumem_pl, PR_WAITOK);
+
+       for (cpu = 0; cpu < ncpus; cpu++)
+               cm[cpu].mem = pool_get(pp, PR_WAITOK | PR_ZERO);
+
+       return (cm);
+}
+
+void
+cpumem_put(struct pool *pp, struct cpumem *cm)
+{
+       unsigned int cpu;
+
+       for (cpu = 0; cpu < ncpus; cpu++)
+               pool_put(pp, cm[cpu].mem);
+
+       pool_put(&cpumem_pl, cm);
+}
+
+struct cpumem *
+cpumem_malloc(size_t sz, int type)
+{
+       struct cpumem *cm;
+       unsigned int cpu;
+
+       sz = roundup(sz, CACHELINESIZE);
+
+       cm = pool_get(&cpumem_pl, PR_WAITOK);
+
+       for (cpu = 0; cpu < ncpus; cpu++)
+               cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO);
+
+       return (cm);
+}
+
+struct cpumem *
+cpumem_realloc(struct cpumem *bootcm, size_t sz, int type)
+{
+       struct cpumem *cm;
+       unsigned int cpu;
+
+       sz = roundup(sz, CACHELINESIZE);
+
+       cm = pool_get(&cpumem_pl, PR_WAITOK);
+
+       cm[0].mem = bootcm[0].mem;
+       for (cpu = 1; cpu < ncpus; cpu++)
+               cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO);
+
+       return (cm);
+}
+
+void
+cpumem_free(struct cpumem *cm, int type, size_t sz)
+{
+       unsigned int cpu;
+
+       sz = roundup(sz, CACHELINESIZE);
+
+       for (cpu = 0; cpu < ncpus; cpu++)
+               free(cm[cpu].mem, type, sz);
+
+       pool_put(&cpumem_pl, cm);
+}
+
+void *
+cpumem_first(struct cpumem_iter *i, struct cpumem *cm)
+{
+       i->cpu = 0;
+
+       return (cm[0].mem);
+}
+
+void *
+cpumem_next(struct cpumem_iter *i, struct cpumem *cm)
+{
+       unsigned int cpu = ++i->cpu;
+
+       if (cpu >= ncpus)
+               return (NULL);
+
+       return (cm[cpu].mem);
+}
+
+struct cpumem *
+counters_alloc(unsigned int n, int type)
+{
+       struct cpumem *cm;
+       struct cpumem_iter cmi;
+       uint64_t *counters;
+       unsigned int i;
+
+       KASSERT(n > 0);
+
+       n++; /* add space for a generation number */
+       cm = cpumem_malloc(n * sizeof(uint64_t), type);
+
+       CPUMEM_FOREACH(counters, &cmi, cm) {
+               for (i = 0; i < n; i++)
+                       counters[i] = 0;
+       }
+
+       return (cm);
+}
+
+struct cpumem *
+counters_realloc(struct cpumem *cm, unsigned int n, int type)
+{
+       n++; /* the generation number */
+       return (cpumem_realloc(cm, n * sizeof(uint64_t), type));
+}
+
+void
+counters_free(struct cpumem *cm, int type, unsigned int n)
+{
+       n++; /* generation number */
+       cpumem_free(cm, type, n * sizeof(uint64_t));
+}
+
+void
+counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
+{
+       struct cpumem_iter cmi;
+       uint64_t *gen, *counters, *temp;
+       uint64_t enter, leave;
+       unsigned int i;
+
+       for (i = 0; i < n; i++)
+               output[i] = 0;
+
+       temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
+
+       gen = cpumem_first(&cmi, cm);
+       do {
+               counters = gen + 1;
+
+               enter = *gen;
+               for (;;) {
+                       /* the generation number is odd during an update */
+                       while (enter & 1) {
+                               yield();
+                               membar_consumer();
+                               enter = *gen;
+                       }
+
+                       for (i = 0; i < n; i++)
+                               temp[i] = counters[i];
+
+                       membar_consumer();
+                       leave = *gen;
+
+                       if (enter == leave)
+                               break;
+
+                       enter = leave;
+               }
+
+               for (i = 0; i < n; i++)
+                       output[i] += temp[i];
+
+               gen = cpumem_next(&cmi, cm);
+       } while (gen != NULL);
+
+       free(temp, M_TEMP, n * sizeof(uint64_t));
+}
+
+void
+counters_zero(struct cpumem *cm, unsigned int n)
+{
+       struct cpumem_iter cmi;
+       uint64_t *counters;
+       unsigned int i;
+
+       n++; /* zero the generation numbers too */
+
+       counters = cpumem_first(&cmi, cm);
+       do {
+               for (i = 0; i < n; i++)
+                       counters[i] = 0;
+
+               counters = cpumem_next(&cmi, cm);
+       } while (counters != NULL);
+}
+
+#else /* MULTIPROCESSOR */
+
+/*
+ * Uniprocessor implementation of per-CPU data structures.
+ *
+ * UP percpu memory is a single memory allocation cast to/from the
+ * cpumem struct. It is not scaled up to the size of cacheline because
+ * there's no other cache to contend with.
+ */
+
+void
+percpu_init(void)
+{
+       /* nop */
+}
+
+struct cpumem *
+cpumem_get(struct pool *pp)
+{
+       return (pool_get(pp, PR_WAITOK));
+}
+
+void
+cpumem_put(struct pool *pp, struct cpumem *cm)
+{
+       pool_put(pp, cm);
+}
+
+struct cpumem *
+cpumem_malloc(size_t sz, int type)
+{
+       return (malloc(sz, type, M_WAITOK));
+}
+
+struct cpumem *
+cpumem_realloc(struct cpumem *cm, size_t sz, int type)
+{
+       return (cm);
+}
+
+void
+cpumem_free(struct cpumem *cm, int type, size_t sz)
+{
+       free(cm, type, sz);
+}
+
+struct cpumem *
+counters_alloc(unsigned int n, int type)
+{
+       KASSERT(n > 0);
+
+       return (cpumem_malloc(n * sizeof(uint64_t), type));
+}
+
+struct cpumem *
+counters_realloc(struct cpumem *cm, unsigned int n, int type)
+{
+       /* this is unecessary, but symmetrical */
+       return (cpumem_realloc(cm, n * sizeof(uint64_t), type));
+}
+
+void
+counters_free(struct cpumem *cm, int type, unsigned int n)
+{
+       cpumem_free(cm, type, n * sizeof(uint64_t));
+}
+
+void
+counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
+{
+       uint64_t *counters;
+       unsigned int i;
+       int s;
+
+       counters = (uint64_t *)cm;
+
+       s = splhigh();
+       for (i = 0; i < n; i++)
+               output[i] = counters[i];
+       splx(s);
+}
+
+void
+counters_zero(struct cpumem *cm, unsigned int n)
+{
+       uint64_t *counters;
+       unsigned int i;
+       int s;
+
+       counters = (uint64_t *)cm;
+
+       s = splhigh();
+       for (i = 0; i < n; i++)
+               counters[i] = 0;
+       splx(s);
+}
+
+#endif /* MULTIPROCESSOR */
+
Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.980
diff -u -p -r1.980 pf.c
--- net/pf.c    17 Aug 2016 03:24:11 -0000      1.980
+++ net/pf.c    17 Aug 2016 11:51:55 -0000
@@ -6104,7 +6104,7 @@ pf_check_tcp_cksum(struct mbuf *m, int o
        }
 
        /* need to do it in software */
-       tcpstat.tcps_inswcsum++;
+       tcpc_inc(tcpc_inswcsum);
        
        switch (af) {
        case AF_INET:
@@ -6125,7 +6125,7 @@ pf_check_tcp_cksum(struct mbuf *m, int o
                unhandled_af(af);
        }
        if (sum) {
-               tcpstat.tcps_rcvbadsum++;
+               tcpc_inc(tcpc_rcvbadsum);
                m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD;
                return (1);
        }
Index: netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.326
diff -u -p -r1.326 ip_output.c
--- netinet/ip_output.c 15 Aug 2016 11:35:25 -0000      1.326
+++ netinet/ip_output.c 17 Aug 2016 11:51:55 -0000
@@ -1800,7 +1800,7 @@ in_proto_cksum_out(struct mbuf *m, struc
        if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
                if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
                    ip->ip_hl != 5 || ifp->if_bridgeport != NULL) {
-                       tcpstat.tcps_outswcsum++;
+                       tcpc_inc(tcpc_outswcsum);
                        in_delayed_cksum(m);
                        m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
                }
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.325
diff -u -p -r1.325 tcp_input.c
--- netinet/tcp_input.c 20 Jul 2016 09:15:28 -0000      1.325
+++ netinet/tcp_input.c 17 Aug 2016 11:51:55 -0000
@@ -220,7 +220,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd
                if (tiqe == NULL || th->th_seq != tp->rcv_nxt) {
                        /* Flush segment queue for this connection */
                        tcp_freeq(tp);
-                       tcpstat.tcps_rcvmemdrop++;
+                       tcpc_inc(tcpc_rcvmemdrop);
                        m_freem(m);
                        return (0);
                }
@@ -247,8 +247,8 @@ tcp_reass(struct tcpcb *tp, struct tcphd
                i = phdr->th_seq + phdr->th_reseqlen - th->th_seq;
                if (i > 0) {
                        if (i >= *tlen) {
-                               tcpstat.tcps_rcvduppack++;
-                               tcpstat.tcps_rcvdupbyte += *tlen;
+                               tcpc_pkt(tcpc_rcvduppack,
+                                   tcpc_rcvdupbyte, *tlen);
                                m_freem(m);
                                pool_put(&tcpqe_pool, tiqe);
                                return (0);
@@ -258,8 +258,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd
                        th->th_seq += i;
                }
        }
-       tcpstat.tcps_rcvoopack++;
-       tcpstat.tcps_rcvoobyte += *tlen;
+       tcpc_pkt(tcpc_rcvoopack, tcpc_rcvoobyte, *tlen);
 
        /*
         * While we overlap succeeding segments trim them or,
@@ -372,6 +371,8 @@ tcp_input(struct mbuf *m, ...)
        int iphlen;
        va_list ap;
        struct tcphdr *th;
+       struct counters_ref r;
+       uint64_t *tcpc;
 #ifdef INET6
        struct ip6_hdr *ip6 = NULL;
 #endif /* INET6 */
@@ -390,7 +391,7 @@ tcp_input(struct mbuf *m, ...)
        iphlen = va_arg(ap, int);
        va_end(ap);
 
-       tcpstat.tcps_rcvtotal++;
+       tcpc_inc(tcpc_rcvtotal);
 
        opti.ts_present = 0;
        opti.maxseg = 0;
@@ -449,7 +450,7 @@ tcp_input(struct mbuf *m, ...)
 
        IP6_EXTHDR_GET(th, struct tcphdr *, m, iphlen, sizeof(*th));
        if (!th) {
-               tcpstat.tcps_rcvshort++;
+               tcpc_inc(tcpc_rcvshort);
                return;
        }
 
@@ -509,10 +510,10 @@ tcp_input(struct mbuf *m, ...)
                int sum;
 
                if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD) {
-                       tcpstat.tcps_rcvbadsum++;
+                       tcpc_inc(tcpc_rcvbadsum);
                        goto drop;
                }
-               tcpstat.tcps_inswcsum++;
+               tcpc_inc(tcpc_inswcsum);
                switch (af) {
                case AF_INET:
                        sum = in4_cksum(m, IPPROTO_TCP, iphlen, tlen);
@@ -525,7 +526,7 @@ tcp_input(struct mbuf *m, ...)
 #endif
                }
                if (sum != 0) {
-                       tcpstat.tcps_rcvbadsum++;
+                       tcpc_inc(tcpc_rcvbadsum);
                        goto drop;
                }
        }
@@ -536,14 +537,14 @@ tcp_input(struct mbuf *m, ...)
         */
        off = th->th_off << 2;
        if (off < sizeof(struct tcphdr) || off > tlen) {
-               tcpstat.tcps_rcvbadoff++;
+               tcpc_inc(tcpc_rcvbadoff);
                goto drop;
        }
        tlen -= off;
        if (off > sizeof(struct tcphdr)) {
                IP6_EXTHDR_GET(th, struct tcphdr *, m, iphlen, off);
                if (!th) {
-                       tcpstat.tcps_rcvshort++;
+                       tcpc_inc(tcpc_rcvshort);
                        return;
                }
                optlen = off - sizeof(struct tcphdr);
@@ -603,7 +604,7 @@ findpcb:
                int     inpl_reverse = 0;
                if (m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST)
                        inpl_reverse = 1;
-               ++tcpstat.tcps_pcbhashmiss;
+               tcpc_inc(tcpc_pcbhashmiss);
                switch (af) {
 #ifdef INET6
                case AF_INET6:
@@ -625,7 +626,7 @@ findpcb:
                 * but should either do a listen or a connect soon.
                 */
                if (inp == NULL) {
-                       ++tcpstat.tcps_noport;
+                       tcpc_inc(tcpc_noport);
                        goto dropwithreset_ratelim;
                }
        }
@@ -842,14 +843,14 @@ findpcb:
                                        case AF_INET6:
                                                if 
(IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
                                                    &ip6->ip6_dst)) {
-                                                       tcpstat.tcps_badsyn++;
+                                                       tcpc_inc(tcpc_badsyn);
                                                        goto drop;
                                                }
                                                break;
 #endif /* INET6 */
                                        case AF_INET:
                                                if (ip->ip_dst.s_addr == 
ip->ip_src.s_addr) {
-                                                       tcpstat.tcps_badsyn++;
+                                                       tcpc_inc(tcpc_badsyn);
                                                        goto drop;
                                                }
                                                break;
@@ -863,7 +864,7 @@ findpcb:
                                if (so->so_qlen > so->so_qlimit ||
                                    syn_cache_add(&src.sa, &dst.sa, th, iphlen,
                                    so, m, optp, optlen, &opti, reuse) == -1) {
-                                       tcpstat.tcps_dropsyn++;
+                                       tcpc_inc(tcpc_dropsyn);
                                        goto drop;
                                }
                                return;
@@ -896,7 +897,7 @@ findpcb:
        ipsp_spd_lookup(m, af, iphlen, &error, IPSP_DIRECTION_IN,
            tdb, inp, 0);
        if (error) {
-               tcpstat.tcps_rcvnosec++;
+               tcpc_inc(tcpc_rcvnosec);
                goto drop;
        }
 #endif /* IPSEC */
@@ -942,7 +943,7 @@ findpcb:
        /* if congestion experienced, set ECE bit in subsequent packets. */
        if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
                tp->t_flags |= TF_RCVD_CE;
-               tcpstat.tcps_ecn_rcvce++;
+               tcpc_inc(tcpc_ecn_rcvce);
        }
 #endif
        /*
@@ -988,7 +989,6 @@ findpcb:
                                /*
                                 * this is a pure ack for outstanding data.
                                 */
-                               ++tcpstat.tcps_predack;
                                if (opti.ts_present && opti.ts_ecr)
                                        tcp_xmit_timer(tp, tcp_now - 
opti.ts_ecr);
                                else if (tp->t_rtttime &&
@@ -996,8 +996,11 @@ findpcb:
                                        tcp_xmit_timer(tp,
                                            tcp_now - tp->t_rtttime);
                                acked = th->th_ack - tp->snd_una;
-                               tcpstat.tcps_rcvackpack++;
-                               tcpstat.tcps_rcvackbyte += acked;
+                               tcpc = counters_enter(&r, tcpcounters);
+                               tcpc[tcpc_predack]++;
+                               tcpc[tcpc_rcvackpack]++;
+                               tcpc[tcpc_rcvackbyte] += acked;
+                               counters_leave(&r, tcpcounters);
                                ND6_HINT(tp);
                                sbdrop(&so->so_snd, acked);
 
@@ -1074,10 +1077,12 @@ findpcb:
                        if (tp->sack_enable && tp->rcv_numsacks)
                                tcp_clean_sackreport(tp);
 #endif /* TCP_SACK */
-                       ++tcpstat.tcps_preddat;
                        tp->rcv_nxt += tlen;
-                       tcpstat.tcps_rcvpack++;
-                       tcpstat.tcps_rcvbyte += tlen;
+                       tcpc = counters_enter(&r, tcpcounters);
+                       tcpc[tcpc_preddat]++;
+                       tcpc[tcpc_rcvpack]++;
+                       tcpc[tcpc_rcvbyte] += tlen;
+                       counters_leave(&r, tcpcounters);
                        ND6_HINT(tp);
 
                        TCP_SETUP_ACK(tp, tiflags, m);
@@ -1144,7 +1149,7 @@ findpcb:
        case TCPS_SYN_RECEIVED:
                if (tiflags & TH_ACK) {
                        if (tiflags & TH_SYN) {
-                               tcpstat.tcps_badsyn++;
+                               tcpc_inc(tcpc_badsyn);
                                goto dropwithreset;
                        }
                        if (SEQ_LEQ(th->th_ack, tp->snd_una) ||
@@ -1216,13 +1221,13 @@ findpcb:
                        case TH_ECE|TH_CWR:
                                tp->t_flags |= TF_ECN_PERMIT;
                                tiflags &= ~(TH_ECE|TH_CWR);
-                               tcpstat.tcps_ecn_accepts++;
+                               tcpc_inc(tcpc_ecn_accepts);
                        }
                }
 #endif
 
                if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
-                       tcpstat.tcps_connects++;
+                       tcpc_inc(tcpc_connects);
                        soisconnected(so);
                        tp->t_state = TCPS_ESTABLISHED;
                        TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
@@ -1266,8 +1271,8 @@ trimthenstep6:
                        m_adj(m, -todrop);
                        tlen = tp->rcv_wnd;
                        tiflags &= ~TH_FIN;
-                       tcpstat.tcps_rcvpackafterwin++;
-                       tcpstat.tcps_rcvbyteafterwin += todrop;
+                       tcpc_pkt(tcpc_rcvpackafterwin,
+                           tcpc_rcvbyteafterwin, todrop);
                }
                tp->snd_wl1 = th->th_seq - 1;
                tp->rcv_up = th->th_seq;
@@ -1333,9 +1338,11 @@ trimthenstep6:
                         */
                        tp->ts_recent = 0;
                } else {
-                       tcpstat.tcps_rcvduppack++;
-                       tcpstat.tcps_rcvdupbyte += tlen;
-                       tcpstat.tcps_pawsdrop++;
+                       tcpc = counters_enter(&r, tcpcounters);
+                       tcpc[tcpc_pawsdrop]++;
+                       tcpc[tcpc_rcvduppack]++;
+                       tcpc[tcpc_rcvdupbyte] += tlen;
+                       counters_leave(&r, tcpcounters);
                        goto dropafterack;
                }
        }
@@ -1364,11 +1371,12 @@ trimthenstep6:
                         * but keep on processing for RST or ACK.
                         */
                        tp->t_flags |= TF_ACKNOW;
-                       tcpstat.tcps_rcvdupbyte += todrop = tlen;
-                       tcpstat.tcps_rcvduppack++;
+                       todrop = tlen;
+                       tcpc_pkt(tcpc_rcvduppack,
+                           tcpc_rcvdupbyte, todrop);
                } else {
-                       tcpstat.tcps_rcvpartduppack++;
-                       tcpstat.tcps_rcvpartdupbyte += todrop;
+                       tcpc_pkt(tcpc_rcvpartduppack,
+                           tcpc_rcvpartdupbyte, todrop);
                }
                hdroptlen += todrop;    /* drop from head afterwards */
                th->th_seq += todrop;
@@ -1388,7 +1396,7 @@ trimthenstep6:
        if ((so->so_state & SS_NOFDREF) &&
            tp->t_state > TCPS_CLOSE_WAIT && tlen) {
                tp = tcp_close(tp);
-               tcpstat.tcps_rcvafterclose++;
+               tcpc_inc(tcpc_rcvafterclose);
                goto dropwithreset;
        }
 
@@ -1398,9 +1406,10 @@ trimthenstep6:
         */
        todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd);
        if (todrop > 0) {
-               tcpstat.tcps_rcvpackafterwin++;
+               tcpc = counters_enter(&r, tcpcounters);
+               tcpc[tcpc_rcvpackafterwin]++;
                if (todrop >= tlen) {
-                       tcpstat.tcps_rcvbyteafterwin += tlen;
+                       tcpc[tcpc_rcvbyteafterwin] += tlen;
                        /*
                         * If window is closed can only take segments at
                         * window edge, and have to drop data and PUSH from
@@ -1410,11 +1419,14 @@ trimthenstep6:
                         */
                        if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
                                tp->t_flags |= TF_ACKNOW;
-                               tcpstat.tcps_rcvwinprobe++;
-                       } else
+                               tcpc[tcpc_rcvwinprobe]++;
+                       } else {
+                               counters_leave(&r, tcpcounters);
                                goto dropafterack;
+                       }
                } else
-                       tcpstat.tcps_rcvbyteafterwin += todrop;
+                       tcpc[tcpc_rcvbyteafterwin] += todrop;
+               counters_leave(&r, tcpcounters);
                m_adj(m, -todrop);
                tlen -= todrop;
                tiflags &= ~(TH_PUSH|TH_FIN);
@@ -1468,7 +1480,7 @@ trimthenstep6:
                        so->so_error = ECONNRESET;
                close:
                        tp->t_state = TCPS_CLOSED;
-                       tcpstat.tcps_drops++;
+                       tcpc_inc(tcpc_drops);
                        tp = tcp_close(tp);
                        goto drop;
                case TCPS_CLOSING:
@@ -1507,7 +1519,7 @@ trimthenstep6:
         * The ACK was checked above.
         */
        case TCPS_SYN_RECEIVED:
-               tcpstat.tcps_connects++;
+               tcpc_inc(tcpc_connects);
                soisconnected(so);
                tp->t_state = TCPS_ESTABLISHED;
                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
@@ -1555,10 +1567,10 @@ trimthenstep6:
                                        tp->snd_cwnd = tp->snd_ssthresh;
                                        tp->snd_last = tp->snd_max;
                                        tp->t_flags |= TF_SEND_CWR;
-                                       tcpstat.tcps_cwr_ecn++;
+                                       tcpc_inc(tcpc_cwr_ecn);
                                }
                        }
-                       tcpstat.tcps_ecn_rcvece++;
+                       tcpc_inc(tcpc_ecn_rcvece);
                }
                /*
                 * if we receive CWR, we know that the peer has reduced
@@ -1566,7 +1578,7 @@ trimthenstep6:
                 */
                if ((tiflags & TH_CWR)) {
                        tp->t_flags &= ~TF_RCVD_CE;
-                       tcpstat.tcps_ecn_rcvcwr++;
+                       tcpc_inc(tcpc_ecn_rcvcwr);
                }
 #endif /* TCP_ECN */
 
@@ -1588,7 +1600,7 @@ trimthenstep6:
                                if (th->th_seq != tp->rcv_nxt &&
                                   SEQ_LT(th->th_ack,
                                   tp->snd_una - tp->max_sndwnd)) {
-                                       tcpstat.tcps_rcvacktooold++;
+                                       tcpc_inc(tcpc_rcvacktooold);
                                        goto drop;
                                }
                                break;
@@ -1604,7 +1616,7 @@ trimthenstep6:
                                break;
                        }
                        if (tiwin == tp->snd_wnd) {
-                               tcpstat.tcps_rcvdupack++;
+                               tcpc_inc(tcpc_rcvdupack);
                                /*
                                 * If we have outstanding data (other than
                                 * a window probe), this is a completely
@@ -1669,8 +1681,12 @@ trimthenstep6:
 #ifdef TCP_ECN
                                                tp->t_flags |= TF_SEND_CWR;
 #endif
-                                               tcpstat.tcps_cwr_frecovery++;
-                                               
tcpstat.tcps_sack_recovery_episode++;
+                                               tcpc = counters_enter(&r,
+                                                   tcpcounters);
+                                               tcpc[tcpc_cwr_frecovery]++;
+                                               
tcpc[tcpc_sack_recovery_episode]++;
+                                               counters_leave(&r,
+                                                   tcpcounters);
 #if defined(TCP_SACK) && defined(TCP_FACK)
                                                tp->t_dupacks = tcprexmtthresh;
                                                (void) tcp_output(tp);
@@ -1698,8 +1714,10 @@ trimthenstep6:
 #ifdef TCP_ECN
                                        tp->t_flags |= TF_SEND_CWR;
 #endif
-                                       tcpstat.tcps_cwr_frecovery++;
-                                       tcpstat.tcps_sndrexmitfast++;
+                                       tcpc = counters_enter(&r, tcpcounters);
+                                       tcpc[tcpc_cwr_frecovery]++;
+                                       tcpc[tcpc_sndrexmitfast]++;
+                                       counters_leave(&r, tcpcounters);
                                        (void) tcp_output(tp);
 
                                        tp->snd_cwnd = tp->snd_ssthresh +
@@ -1788,12 +1806,11 @@ trimthenstep6:
                tp->t_dupacks = 0;
 #endif
                if (SEQ_GT(th->th_ack, tp->snd_max)) {
-                       tcpstat.tcps_rcvacktoomuch++;
+                       tcpc_inc(tcpc_rcvacktoomuch);
                        goto dropafterack_ratelim;
                }
                acked = th->th_ack - tp->snd_una;
-               tcpstat.tcps_rcvackpack++;
-               tcpstat.tcps_rcvackbyte += acked;
+               tcpc_pkt(tcpc_rcvackpack, tcpc_rcvackbyte, acked);
 
                /*
                 * If we have a timestamp reply, update smoothed
@@ -1966,7 +1983,7 @@ step6:
                /* keep track of pure window updates */
                if (tlen == 0 &&
                    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
-                       tcpstat.tcps_rcvwinupd++;
+                       tcpc_inc(tcpc_rcvwinupd);
                tp->snd_wnd = tiwin;
                tp->snd_wl1 = th->th_seq;
                tp->snd_wl2 = th->th_ack;
@@ -2052,8 +2069,7 @@ dodata:                                                   
/* XXX */
                        TCP_SETUP_ACK(tp, tiflags, m);
                        tp->rcv_nxt += tlen;
                        tiflags = th->th_flags & TH_FIN;
-                       tcpstat.tcps_rcvpack++;
-                       tcpstat.tcps_rcvbyte += tlen;
+                       tcpc_pkt(tcpc_rcvpack, tcpc_rcvbyte, tlen);
                        ND6_HINT(tp);
                        if (so->so_state & SS_CANTRCVMORE)
                                m_freem(m);
@@ -2165,7 +2181,7 @@ badsyn:
        /*
         * Received a bad SYN.  Increment counters and dropwithreset.
         */
-       tcpstat.tcps_badsyn++;
+       tcpc_inc(tcpc_badsyn);
        tp = NULL;
        goto dropwithreset;
 
@@ -2392,7 +2408,7 @@ tcp_dooptions(struct tcpcb *tp, u_char *
        }
 
        if ((sigp ? TF_SIGNATURE : 0) ^ (tp->t_flags & TF_SIGNATURE)) {
-               tcpstat.tcps_rcvbadsig++;
+               tcpc_inc(tcpc_rcvbadsig);
                return (-1);
        }
 
@@ -2400,7 +2416,7 @@ tcp_dooptions(struct tcpcb *tp, u_char *
                char sig[16];
 
                if (tdb == NULL) {
-                       tcpstat.tcps_rcvbadsig++;
+                       tcpc_inc(tcpc_rcvbadsig);
                        return (-1);
                }
 
@@ -2408,11 +2424,11 @@ tcp_dooptions(struct tcpcb *tp, u_char *
                        return (-1);
 
                if (timingsafe_bcmp(sig, sigp, 16)) {
-                       tcpstat.tcps_rcvbadsig++;
+                       tcpc_inc(tcpc_rcvbadsig);
                        return (-1);
                }
 
-               tcpstat.tcps_rcvgoodsig++;
+               tcpc_inc(tcpc_rcvgoodsig);
        }
 #endif /* TCP_SIGNATURE */
 
@@ -2550,7 +2566,7 @@ tcp_sack_option(struct tcpcb *tp, struct
        /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
        tmp_cp = cp + 2;
        tmp_olen = optlen - 2;
-       tcpstat.tcps_sack_rcv_opts++;
+       tcpc_inc(tcpc_sack_rcv_opts);
        if (tp->snd_numholes < 0)
                tp->snd_numholes = 0;
        if (tp->t_maxseg == 0)
@@ -2870,7 +2886,7 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt
        else if (rtt > TCP_RTT_MAX)
                rtt = TCP_RTT_MAX;
 
-       tcpstat.tcps_rttupdated++;
+       tcpc_inc(tcpc_rttupdated);
        if (tp->t_srtt != 0) {
                /*
                 * delta is fixed point with 2 (TCP_RTT_BASE_SHIFT) bits
@@ -3272,6 +3288,7 @@ int       tcp_syn_bucket_limit = 3*TCP_SYN_BUC
 int    tcp_syn_use_limit = 100000;
 
 struct syn_cache_set tcp_syn_cache[2];
+struct tcpscstat syn_cache_stat;
 int tcp_syn_cache_active;
 
 #define SYN_HASH(sa, sp, dp, rand) \
@@ -3412,7 +3429,7 @@ syn_cache_insert(struct syn_cache *sc, s
                        }
                }
                arc4random_buf(set->scs_random, sizeof(set->scs_random));
-               tcpstat.tcps_sc_seedrandom++;
+               syn_cache_stat.tcpsc_seedrandom++;
        }
 
        SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa,
@@ -3425,7 +3442,7 @@ syn_cache_insert(struct syn_cache *sc, s
         * limit or the total cache size limit.
         */
        if (scp->sch_length >= tcp_syn_bucket_limit) {
-               tcpstat.tcps_sc_bucketoverflow++;
+               syn_cache_stat.tcpsc_bucketoverflow++;
                /*
                 * Someone might attack our bucket hash function.  Reseed
                 * with random as soon as the passive syn cache gets empty.
@@ -3449,7 +3466,7 @@ syn_cache_insert(struct syn_cache *sc, s
        } else if (set->scs_count >= tcp_syn_cache_limit) {
                struct syn_cache_head *scp2, *sce;
 
-               tcpstat.tcps_sc_overflowed++;
+               syn_cache_stat.tcpsc_overflowed++;
                /*
                 * The cache is full.  Toss the oldest entry in the
                 * first non-empty bucket we can find.
@@ -3499,7 +3516,7 @@ syn_cache_insert(struct syn_cache *sc, s
        set->scs_count++;
        set->scs_use--;
 
-       tcpstat.tcps_sc_added++;
+       syn_cache_stat.tcpsc_added++;
 
        /*
         * If the active cache has exceeded its use limit and
@@ -3543,7 +3560,7 @@ syn_cache_timer(void *arg)
        if (sc->sc_rxttot >= tcptv_keep_init)
                goto dropit;
 
-       tcpstat.tcps_sc_retransmitted++;
+       syn_cache_stat.tcpsc_retransmitted++;
        (void) syn_cache_respond(sc, NULL);
 
        /* Advance the timer back-off. */
@@ -3554,7 +3571,7 @@ syn_cache_timer(void *arg)
        return;
 
  dropit:
-       tcpstat.tcps_sc_timed_out++;
+       syn_cache_stat.tcpsc_timed_out++;
        syn_cache_rm(sc);
        syn_cache_put(sc);
        splx(s);
@@ -3835,7 +3852,7 @@ syn_cache_get(struct sockaddr *src, stru
 #ifdef TCP_ECN
        if (sc->sc_flags & SCF_ECN_PERMIT) {
                tp->t_flags |= TF_ECN_PERMIT;
-               tcpstat.tcps_ecn_accepts++;
+               tcpc_inc(tcpc_ecn_accepts);
        }
 #endif
 #ifdef TCP_SACK
@@ -3850,7 +3867,7 @@ syn_cache_get(struct sockaddr *src, stru
        tp->t_state = TCPS_SYN_RECEIVED;
        tp->t_rcvtime = tcp_now;
        TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
-       tcpstat.tcps_accepts++;
+       tcpc_inc(tcpc_accepts);
 
        tcp_mss(tp, sc->sc_peermaxseg);  /* sets t_maxseg */
        if (sc->sc_peermaxseg)
@@ -3872,7 +3889,7 @@ syn_cache_get(struct sockaddr *src, stru
                tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
        tp->last_ack_sent = tp->rcv_nxt;
 
-       tcpstat.tcps_sc_completed++;
+       syn_cache_stat.tcpsc_completed++;
        syn_cache_put(sc);
        return (so);
 
@@ -3884,7 +3901,7 @@ abort:
        if (so != NULL)
                (void) soabort(so);
        syn_cache_put(sc);
-       tcpstat.tcps_sc_aborted++;
+       syn_cache_stat.tcpsc_aborted++;
        return ((struct socket *)(-1));
 }
 
@@ -3913,7 +3930,7 @@ syn_cache_reset(struct sockaddr *src, st
        }
        syn_cache_rm(sc);
        splx(s);
-       tcpstat.tcps_sc_reset++;
+       syn_cache_stat.tcpsc_reset++;
        syn_cache_put(sc);
 }
 
@@ -3952,7 +3969,7 @@ syn_cache_unreach(struct sockaddr *src, 
 
        syn_cache_rm(sc);
        splx(s);
-       tcpstat.tcps_sc_unreach++;
+       syn_cache_stat.tcpsc_unreach++;
        syn_cache_put(sc);
 }
 
@@ -3980,6 +3997,8 @@ syn_cache_add(struct sockaddr *src, stru
        struct syn_cache *sc;
        struct syn_cache_head *scp;
        struct mbuf *ipopts;
+       struct counters_ref r;
+       uint64_t *tcpc;
 
        tp = sototcpcb(so);
 
@@ -4035,7 +4054,7 @@ syn_cache_add(struct sockaddr *src, stru
         */
        if ((sc = syn_cache_lookup(src, dst, &scp, sotoinpcb(so)->inp_rtableid))
            != NULL) {
-               tcpstat.tcps_sc_dupesyn++;
+               syn_cache_stat.tcpsc_dupesyn++;
                if (ipopts) {
                        /*
                         * If we were remembering a previous source route,
@@ -4047,8 +4066,10 @@ syn_cache_add(struct sockaddr *src, stru
                }
                sc->sc_timestamp = tb.ts_recent;
                if (syn_cache_respond(sc, m) == 0) {
-                       tcpstat.tcps_sndacks++;
-                       tcpstat.tcps_sndtotal++;
+                       tcpc = counters_enter(&r, tcpcounters);
+                       tcpc[tcpc_sndacks]++;
+                       tcpc[tcpc_sndtotal]++;
+                       counters_leave(&r, tcpcounters);
                }
                return (0);
        }
@@ -4135,11 +4156,13 @@ syn_cache_add(struct sockaddr *src, stru
        sc->sc_tp = tp;
        if (syn_cache_respond(sc, m) == 0) {
                syn_cache_insert(sc, tp);
-               tcpstat.tcps_sndacks++;
-               tcpstat.tcps_sndtotal++;
+               tcpc = counters_enter(&r, tcpcounters);
+               tcpc[tcpc_sndacks]++;
+               tcpc[tcpc_sndtotal]++;
+               counters_leave(&r, tcpcounters);
        } else {
                syn_cache_put(sc);
-               tcpstat.tcps_sc_dropped++;
+               syn_cache_stat.tcpsc_dropped++;
        }
 
        return (0);
Index: netinet/tcp_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.118
diff -u -p -r1.118 tcp_output.c
--- netinet/tcp_output.c        19 Jul 2016 21:28:43 -0000      1.118
+++ netinet/tcp_output.c        17 Aug 2016 11:51:55 -0000
@@ -211,6 +211,8 @@ tcp_output(struct tcpcb *tp)
        u_char *opt = (u_char *)optbuf;
        unsigned int optlen, hdrlen, packetlen;
        int idle, sendalot = 0;
+       struct counters_ref r;
+       uint64_t *tcpc;
 #ifdef TCP_SACK
        int i, sack_rxmit = 0;
        struct sackhole *p;
@@ -641,7 +643,7 @@ send:
                int count = 0;  /* actual number of SACKs inserted */
                int maxsack = (MAX_TCPOPTLEN - (optlen + 4))/TCPOLEN_SACK;
 
-               tcpstat.tcps_sack_snd_opts++;
+               tcpc_inc(tcpc_sack_snd_opts);
                maxsack = min(maxsack, TCP_MAX_SACK);
                for (i = 0; (i < tp->rcv_numsacks && count < maxsack); i++) {
                        struct sackblk sack = tp->sackblks[i];
@@ -684,15 +686,17 @@ send:
         * the template for sends on this connection.
         */
        if (len) {
+               tcpc = counters_enter(&r, tcpcounters);
                if (tp->t_force && len == 1)
-                       tcpstat.tcps_sndprobe++;
+                       tcpc[tcpc_sndprobe]++;
                else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
-                       tcpstat.tcps_sndrexmitpack++;
-                       tcpstat.tcps_sndrexmitbyte += len;
+                       tcpc[tcpc_sndrexmitpack]++;
+                       tcpc[tcpc_sndrexmitbyte] += len;
                } else {
-                       tcpstat.tcps_sndpack++;
-                       tcpstat.tcps_sndbyte += len;
+                       tcpc[tcpc_sndpack]++;
+                       tcpc[tcpc_sndbyte] += len;
                }
+               counters_leave(&r, tcpcounters);
 #ifdef notyet
                if ((m = m_copypack(so->so_snd.sb_mb, off,
                    (int)len, max_linkhdr + hdrlen)) == 0) {
@@ -745,14 +749,16 @@ send:
                if (off + len == so->so_snd.sb_cc && !soissending(so))
                        flags |= TH_PUSH;
        } else {
+               tcpc = counters_enter(&r, tcpcounters);
                if (tp->t_flags & TF_ACKNOW)
-                       tcpstat.tcps_sndacks++;
+                       tcpc[tcpc_sndacks]++;
                else if (flags & (TH_SYN|TH_FIN|TH_RST))
-                       tcpstat.tcps_sndctrl++;
+                       tcpc[tcpc_sndctrl]++;
                else if (SEQ_GT(tp->snd_up, tp->snd_una))
-                       tcpstat.tcps_sndurg++;
+                       tcpc[tcpc_sndurg]++;
                else
-                       tcpstat.tcps_sndwinup++;
+                       tcpc[tcpc_sndwinup]++;
+               counters_leave(&r, tcpcounters);
 
                MGETHDR(m, M_DONTWAIT, MT_HEADER);
                if (m != NULL && max_linkhdr + hdrlen > MHLEN) {
@@ -823,8 +829,7 @@ send:
 #if defined(TCP_SACK) && defined(TCP_FACK)
                tp->retran_data += len;
 #endif /* TCP_FACK */
-               tcpstat.tcps_sack_rexmits++;
-               tcpstat.tcps_sack_rexmit_bytes += len;
+               tcpc_pkt(tcpc_sack_rexmits, tcpc_sack_rexmit_bytes, len);
        }
 #endif /* TCP_SACK */
 
@@ -841,7 +846,7 @@ send:
                 */
                if (tp->t_flags & TF_RCVD_CE) {
                        flags |= TH_ECE;
-                       tcpstat.tcps_ecn_sndece++;
+                       tcpc_inc(tcpc_ecn_sndece);
                }
                if (!(tp->t_flags & TF_DISABLE_ECN)) {
                        /*
@@ -862,7 +867,7 @@ send:
                    (tp->t_flags & TF_SEND_CWR)) {
                        flags |= TH_CWR;
                        tp->t_flags &= ~TF_SEND_CWR;
-                       tcpstat.tcps_ecn_sndcwr++;
+                       tcpc_inc(tcpc_ecn_sndcwr);
                }
        }
 #endif
@@ -982,7 +987,7 @@ send:
                        if (tp->t_rtttime == 0) {
                                tp->t_rtttime = tcp_now;
                                tp->t_rtseq = startseq;
-                               tcpstat.tcps_segstimed++;
+                               tcpc_inc(tcpc_segstimed);
                        }
                }
 
@@ -1073,7 +1078,7 @@ send:
                        /* don't set ECT */
                } else {
                        needect = 1;
-                       tcpstat.tcps_ecn_sndect++;
+                       tcpc_inc(tcpc_ecn_sndect);
                }
        }
 #endif
@@ -1172,9 +1177,11 @@ out:
        if (packetlen > tp->t_pmtud_mtu_sent)
                tp->t_pmtud_mtu_sent = packetlen;
 
-       tcpstat.tcps_sndtotal++;
+       tcpc = counters_enter(&r, tcpcounters);
+       tcpc[tcpc_sndtotal]++;
        if (tp->t_flags & TF_DELACK)
-               tcpstat.tcps_delack++;
+               tcpc[tcpc_delack]++;
+       counters_leave(&r, tcpcounters);
 
        /*
         * Data sent (as far as we can tell).
Index: netinet/tcp_subr.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.151
diff -u -p -r1.151 tcp_subr.c
--- netinet/tcp_subr.c  7 Mar 2016 18:44:00 -0000       1.151
+++ netinet/tcp_subr.c  17 Aug 2016 11:51:55 -0000
@@ -131,7 +131,7 @@ struct pool tcpqe_pool;
 struct pool sackhl_pool;
 #endif
 
-struct tcpstat tcpstat;                /* tcp statistics */
+struct cpumem *tcpcounters;            /* tcp statistics */
 tcp_seq  tcp_iss;
 
 /*
@@ -140,6 +140,7 @@ tcp_seq  tcp_iss;
 void
 tcp_init(void)
 {
+       tcpcounters = counters_alloc(tcpc_ncounters, M_PCB);
        tcp_iss = 1;            /* wrong */
        pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcb", NULL);
        pool_init(&tcpqe_pool, sizeof(struct tcpqent), 0, 0, 0, "tcpqe", NULL);
@@ -498,9 +499,9 @@ tcp_drop(tp, errno)
        if (TCPS_HAVERCVDSYN(tp->t_state)) {
                tp->t_state = TCPS_CLOSED;
                (void) tcp_output(tp);
-               tcpstat.tcps_drops++;
+               tcpc_inc(tcpc_drops);
        } else
-               tcpstat.tcps_conndrops++;
+               tcpc_inc(tcpc_conndrops);
        if (errno == ETIMEDOUT && tp->t_softerror)
                errno = tp->t_softerror;
        so->so_error = errno;
@@ -559,7 +560,7 @@ tcp_reaper(void *arg)
        s = splsoftnet();
        pool_put(&tcpcb_pool, tp);
        splx(s);
-       tcpstat.tcps_closed++;
+       tcpc_inc(tcpc_closed);
 }
 
 int
Index: netinet/tcp_timer.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.49
diff -u -p -r1.49 tcp_timer.c
--- netinet/tcp_timer.c 7 Mar 2016 18:44:00 -0000       1.49
+++ netinet/tcp_timer.c 17 Aug 2016 11:51:55 -0000
@@ -234,12 +234,12 @@ tcp_timer_rexmt(void *arg)
 #endif
        if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
                tp->t_rxtshift = TCP_MAXRXTSHIFT;
-               tcpstat.tcps_timeoutdrop++;
+               tcpc_inc(tcpc_timeoutdrop);
                (void)tcp_drop(tp, tp->t_softerror ?
                    tp->t_softerror : ETIMEDOUT);
                goto out;
        }
-       tcpstat.tcps_rexmttimeo++;
+       tcpc_inc(tcpc_rexmttimeo);
        rto = TCP_REXMTVAL(tp);
        if (rto < tp->t_rttmin)
                rto = tp->t_rttmin;
@@ -371,7 +371,7 @@ tcp_timer_rexmt(void *arg)
                tp->t_flags |= TF_SEND_CWR;
 #endif
 #if 1 /* TCP_ECN */
-               tcpstat.tcps_cwr_timeout++;
+               tcpc_inc(tcpc_cwr_timeout);
 #endif
        }
        (void) tcp_output(tp);
@@ -393,7 +393,7 @@ tcp_timer_persist(void *arg)
                splx(s);
                return;
        }
-       tcpstat.tcps_persisttimeo++;
+       tcpc_inc(tcpc_persisttimeo);
        /*
         * Hack: if the peer is dead/unreachable, we do not
         * time out if the window is closed.  After a full
@@ -407,7 +407,7 @@ tcp_timer_persist(void *arg)
        if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
            ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle ||
            (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) {
-               tcpstat.tcps_persistdrop++;
+               tcpc_inc(tcpc_persistdrop);
                tp = tcp_drop(tp, ETIMEDOUT);
                goto out;
        }
@@ -431,7 +431,7 @@ tcp_timer_keep(void *arg)
                return;
        }
 
-       tcpstat.tcps_keeptimeo++;
+       tcpc_inc(tcpc_keeptimeo);
        if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
                goto dropit;
        if ((tcp_always_keepalive ||
@@ -452,7 +452,7 @@ tcp_timer_keep(void *arg)
                 * by the protocol spec, this requires the
                 * correspondent TCP to respond.
                 */
-               tcpstat.tcps_keepprobe++;
+               tcpc_inc(tcpc_keepprobe);
                tcp_respond(tp, mtod(tp->t_template, caddr_t),
                    NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0);
                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
@@ -463,7 +463,7 @@ tcp_timer_keep(void *arg)
        return;
 
  dropit:
-       tcpstat.tcps_keepdrops++;
+       tcpc_inc(tcpc_keepdrops);
        tp = tcp_drop(tp, ETIMEDOUT);
 
        splx(s);
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.134
diff -u -p -r1.134 tcp_usrreq.c
--- netinet/tcp_usrreq.c        20 Jul 2016 19:57:53 -0000      1.134
+++ netinet/tcp_usrreq.c        17 Aug 2016 11:51:55 -0000
@@ -116,6 +116,7 @@ int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_
 struct inpcbtable tcbtable;
 
 int tcp_ident(void *, size_t *, void *, size_t, int);
+int tcp_stats(void *, size_t *, void *, size_t);
 
 /*
  * Process a TCP user request for TCP tb.  If this is a send request
@@ -288,7 +289,7 @@ tcp_usrreq(so, req, m, nam, control, p)
                tcp_rscale(tp, sb_max);
 
                soisconnecting(so);
-               tcpstat.tcps_connattempt++;
+               tcpc_inc(tcpc_connattempt);
                tp->t_state = TCPS_SYN_SENT;
                TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
                tcp_set_iss_tsm(tp);
@@ -825,7 +826,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
        }
 
        if (inp == NULL) {
-               ++tcpstat.tcps_pcbhashmiss;
+               tcpc_inc(tcpc_pcbhashmiss);
                switch (tir.faddr.ss_family) {
 #ifdef INET6
                case AF_INET6:
@@ -934,28 +935,7 @@ tcp_sysctl(name, namelen, oldp, oldlenp,
 #endif
 
        case TCPCTL_STATS:
-               if (newp != NULL)
-                       return (EPERM);
-               {
-                       struct syn_cache_set *set;
-                       int i;
-
-                       set = &tcp_syn_cache[tcp_syn_cache_active];
-                       tcpstat.tcps_sc_hash_size = set->scs_size;
-                       tcpstat.tcps_sc_entry_count = set->scs_count;
-                       tcpstat.tcps_sc_entry_limit = tcp_syn_cache_limit;
-                       tcpstat.tcps_sc_bucket_maxlen = 0;
-                       for (i = 0; i < set->scs_size; i++) {
-                               if (tcpstat.tcps_sc_bucket_maxlen <
-                                   set->scs_buckethead[i].sch_length)
-                                       tcpstat.tcps_sc_bucket_maxlen =
-                                           set->scs_buckethead[i].sch_length;
-                       }
-                       tcpstat.tcps_sc_bucket_limit = tcp_syn_bucket_limit;
-                       tcpstat.tcps_sc_uses_left = set->scs_use;
-               }
-               return (sysctl_struct(oldp, oldlenp, newp, newlen,
-                   &tcpstat, sizeof(tcpstat)));
+               return tcp_stats(oldp, oldlenp, newp, newlen);
 
        case TCPCTL_SYN_USE_LIMIT:
                error = sysctl_int(oldp, oldlenp, newp, newlen,
@@ -1002,6 +982,152 @@ tcp_sysctl(name, namelen, oldp, oldlenp,
                return (ENOPROTOOPT);
        }
        /* NOTREACHED */
+}
+
+int
+tcp_stats(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+       struct tcpstat tcpstat;
+       uint64_t tcpc[tcpc_ncounters];
+       extern struct tcpscstat syn_cache_stat;
+       struct syn_cache_set *set;
+       int i;
+
+       if (newp != NULL)
+               return (EPERM);
+
+       counters_read(tcpcounters, tcpc, nitems(tcpc));
+
+#define TCP_C2S(n) tcpstat.tcps_##n = tcpc[tcpc_##n]
+#define TCP_SC2S(n) tcpstat.tcps_sc_##n = syn_cache_stat.tcpsc_##n
+
+       TCP_C2S(connattempt);
+       TCP_C2S(accepts);
+       TCP_C2S(connects);
+       TCP_C2S(drops);
+       TCP_C2S(conndrops);
+       TCP_C2S(closed);
+       TCP_C2S(segstimed);
+       TCP_C2S(rttupdated);
+       TCP_C2S(delack);
+       TCP_C2S(timeoutdrop);
+       TCP_C2S(rexmttimeo);
+       TCP_C2S(persisttimeo);
+       TCP_C2S(persistdrop);
+       TCP_C2S(keeptimeo);
+       TCP_C2S(keepprobe);
+       TCP_C2S(keepdrops);
+
+       TCP_C2S(sndtotal);
+       TCP_C2S(sndpack);
+       TCP_C2S(sndbyte);
+       TCP_C2S(sndrexmitpack);
+       TCP_C2S(sndrexmitbyte);
+       TCP_C2S(sndrexmitfast);
+       TCP_C2S(sndacks);
+       TCP_C2S(sndprobe);
+       TCP_C2S(sndurg);
+       TCP_C2S(sndwinup);
+       TCP_C2S(sndctrl);
+
+       TCP_C2S(rcvtotal);
+       TCP_C2S(rcvpack);
+       TCP_C2S(rcvbyte);
+       TCP_C2S(rcvbadsum);
+       TCP_C2S(rcvbadoff);
+       TCP_C2S(rcvmemdrop);
+       TCP_C2S(rcvnosec);
+       TCP_C2S(rcvshort);
+       TCP_C2S(rcvduppack);
+       TCP_C2S(rcvdupbyte);
+       TCP_C2S(rcvpartduppack);
+       TCP_C2S(rcvpartdupbyte);
+       TCP_C2S(rcvoopack);
+       TCP_C2S(rcvoobyte);
+       TCP_C2S(rcvpackafterwin);
+       TCP_C2S(rcvbyteafterwin);
+       TCP_C2S(rcvafterclose);
+       TCP_C2S(rcvwinprobe);
+       TCP_C2S(rcvdupack);
+       TCP_C2S(rcvacktoomuch);
+       TCP_C2S(rcvacktooold);
+       TCP_C2S(rcvackpack);
+       TCP_C2S(rcvackbyte);
+       TCP_C2S(rcvwinupd);
+       TCP_C2S(pawsdrop);
+       TCP_C2S(predack);
+       TCP_C2S(preddat);
+
+       TCP_C2S(pcbhashmiss);
+       TCP_C2S(noport);
+       TCP_C2S(badsyn);
+       TCP_C2S(dropsyn);
+
+       TCP_C2S(rcvbadsig);
+       TCP_C2S(rcvgoodsig);
+       TCP_C2S(inswcsum);
+       TCP_C2S(outswcsum);
+
+       /* ECN stats */
+       TCP_C2S(ecn_accepts);
+       TCP_C2S(ecn_rcvece);
+       TCP_C2S(ecn_rcvcwr);
+       TCP_C2S(ecn_rcvce);
+       TCP_C2S(ecn_sndect);
+       TCP_C2S(ecn_sndece);
+       TCP_C2S(ecn_sndcwr);
+       TCP_C2S(cwr_ecn);
+       TCP_C2S(cwr_frecovery);
+       TCP_C2S(cwr_timeout);
+
+        /* These statistics deal with the SYN cache. */
+       TCP_SC2S(added);
+       TCP_SC2S(completed);
+       TCP_SC2S(timed_out);
+       TCP_SC2S(overflowed);
+       TCP_SC2S(reset);
+       TCP_SC2S(unreach);
+       TCP_SC2S(bucketoverflow);
+       TCP_SC2S(aborted);
+       TCP_SC2S(dupesyn);
+       TCP_SC2S(dropped);
+       TCP_SC2S(collisions);
+       TCP_SC2S(retransmitted);
+       TCP_SC2S(seedrandom);
+       TCP_SC2S(hash_size);
+       TCP_SC2S(entry_count);
+       TCP_SC2S(bucket_maxlen);
+       TCP_SC2S(bucket_limit);
+       TCP_SC2S(uses_left);
+
+       TCP_C2S(conndrained);
+
+       TCP_C2S(sack_recovery_episode);
+       TCP_C2S(sack_rexmits);
+       TCP_C2S(sack_rexmit_bytes);
+       TCP_C2S(sack_rcv_opts);
+       TCP_C2S(sack_snd_opts);
+
+#undef TCP_C2S
+#undef TCP_SC2S
+
+       set = &tcp_syn_cache[tcp_syn_cache_active];
+       tcpstat.tcps_sc_hash_size = set->scs_size;
+       tcpstat.tcps_sc_entry_count = set->scs_count;
+       tcpstat.tcps_sc_entry_limit = tcp_syn_cache_limit;
+       tcpstat.tcps_sc_bucket_maxlen = 0;
+       for (i = 0; i < set->scs_size; i++) {
+               if (tcpstat.tcps_sc_bucket_maxlen <
+                   set->scs_buckethead[i].sch_length) {
+                       tcpstat.tcps_sc_bucket_maxlen =
+                           set->scs_buckethead[i].sch_length;
+               }
+       }
+       tcpstat.tcps_sc_bucket_limit = tcp_syn_bucket_limit;
+       tcpstat.tcps_sc_uses_left = set->scs_use;
+
+       return (sysctl_struct(oldp, oldlenp, newp, newlen,
+           &tcpstat, sizeof(tcpstat)));
 }
 
 /*
Index: netinet/tcp_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.115
diff -u -p -r1.115 tcp_var.h
--- netinet/tcp_var.h   20 Jul 2016 19:57:53 -0000      1.115
+++ netinet/tcp_var.h   17 Aug 2016 11:51:55 -0000
@@ -360,6 +360,146 @@ struct syn_cache_set {
  * Many of these should be kept per connection,
  * but that's inconvenient at the moment.
  */
+
+#ifdef _KERNEL
+enum tcpcounters {
+       tcpc_connattempt,       /* connections initiated */
+       tcpc_accepts,           /* connections accepted */
+       tcpc_connects,          /* connections established */
+       tcpc_drops,             /* connections dropped */
+       tcpc_conndrops,         /* embryonic connections dropped */
+       tcpc_closed,            /* conn. closed (includes drops) */
+       tcpc_segstimed,         /* segs where we tried to get rtt */
+       tcpc_rttupdated,        /* times we succeeded */
+       tcpc_delack,            /* delayed acks sent */
+       tcpc_timeoutdrop,       /* conn. dropped in rxmt timeout */
+       tcpc_rexmttimeo,        /* retransmit timeouts */
+       tcpc_persisttimeo,      /* persist timeouts */
+       tcpc_persistdrop,       /* connections dropped in persist */
+       tcpc_keeptimeo,         /* keepalive timeouts */
+       tcpc_keepprobe,         /* keepalive probes sent */
+       tcpc_keepdrops,         /* connections dropped in keepalive */
+
+       tcpc_sndtotal,          /* total packets sent */
+       tcpc_sndpack,           /* data packets sent */
+       tcpc_sndbyte,           /* data bytes sent */
+       tcpc_sndrexmitpack,     /* data packets retransmitted */
+       tcpc_sndrexmitbyte,     /* data bytes retransmitted */
+       tcpc_sndrexmitfast,     /* Fast retransmits */
+       tcpc_sndacks,           /* ack-only packets sent */
+       tcpc_sndprobe,          /* window probes sent */
+       tcpc_sndurg,            /* packets sent with URG only */
+       tcpc_sndwinup,          /* window update-only packets sent */
+       tcpc_sndctrl,           /* control (SYN|FIN|RST) packets sent */
+
+       tcpc_rcvtotal,          /* total packets received */
+       tcpc_rcvpack,           /* packets received in sequence */
+       tcpc_rcvbyte,           /* bytes received in sequence */
+       tcpc_rcvbadsum,         /* packets received with ccksum errs */
+       tcpc_rcvbadoff,         /* packets received with bad offset */
+       tcpc_rcvmemdrop,        /* packets dropped for lack of memory */
+       tcpc_rcvnosec,          /* packets dropped for lack of ipsec */
+       tcpc_rcvshort,          /* packets received too short */
+       tcpc_rcvduppack,        /* duplicate-only packets received */
+       tcpc_rcvdupbyte,        /* duplicate-only bytes received */
+       tcpc_rcvpartduppack,    /* packets with some duplicate data */
+       tcpc_rcvpartdupbyte,    /* dup. bytes in part-dup. packets */
+       tcpc_rcvoopack,         /* out-of-order packets received */
+       tcpc_rcvoobyte,         /* out-of-order bytes received */
+       tcpc_rcvpackafterwin,   /* packets with data after window */
+       tcpc_rcvbyteafterwin,   /* bytes rcvd after window */
+       tcpc_rcvafterclose,     /* packets rcvd after "close" */
+       tcpc_rcvwinprobe,       /* rcvd window probe packets */
+       tcpc_rcvdupack,         /* rcvd duplicate acks */
+       tcpc_rcvacktoomuch,     /* rcvd acks for unsent data */
+       tcpc_rcvacktooold,      /* rcvd acks for old data */
+       tcpc_rcvackpack,        /* rcvd ack packets */
+       tcpc_rcvackbyte,        /* bytes acked by rcvd acks */
+       tcpc_rcvwinupd,         /* rcvd window update packets */
+       tcpc_pawsdrop,          /* segments dropped due to PAWS */
+       tcpc_predack,           /* times hdr predict ok for acks */
+       tcpc_preddat,           /* times hdr predict ok for data pkts */
+
+       tcpc_pcbhashmiss,       /* input packets missing pcb hash */
+       tcpc_noport,            /* no socket on port */
+       tcpc_badsyn,            /* SYN packet with src==dst rcv'ed */
+       tcpc_dropsyn,           /* SYN packet dropped */
+
+       tcpc_rcvbadsig,         /* rcvd bad/missing TCP signatures */
+       tcpc_rcvgoodsig,        /* rcvd good TCP signatures */
+       tcpc_inswcsum,          /* input software-checksummed packets */
+       tcpc_outswcsum,         /* output software-checksummed packets */
+
+       /* ECN stats */
+       tcpc_ecn_accepts,       /* ecn connections accepted */
+       tcpc_ecn_rcvece,        /* # of rcvd ece */
+       tcpc_ecn_rcvcwr,        /* # of rcvd cwr */
+       tcpc_ecn_rcvce,         /* # of rcvd ce in ip header */
+       tcpc_ecn_sndect,        /* # of cwr sent */
+       tcpc_ecn_sndece,        /* # of ece sent */
+       tcpc_ecn_sndcwr,        /* # of cwr sent */
+       tcpc_cwr_ecn,           /* # of cwnd reduced by ecn */
+       tcpc_cwr_frecovery,     /* # of cwnd reduced by fastrecovery */
+       tcpc_cwr_timeout,       /* # of cwnd reduced by timeout */
+
+       tcpc_conndrained,       /* # of connections drained */
+
+       tcpc_sack_recovery_episode,     /* SACK recovery episodes */
+       tcpc_sack_rexmits,      /* SACK rexmit segments */
+       tcpc_sack_rexmit_bytes, /* SACK rexmit bytes */
+       tcpc_sack_rcv_opts,     /* SACK options received */
+       tcpc_sack_snd_opts,     /* SACK options sent */
+
+       tcpc_ncounters
+};
+
+/* These statistics deal with the SYN cache. */
+struct tcpscstat {
+       uint64_t tcpsc_added;           /* # of entries added */
+       uint64_t tcpsc_completed;       /* # of connections completed */
+       uint64_t tcpsc_timed_out;       /* # of entries timed out */
+       uint64_t tcpsc_overflowed;      /* # dropped due to overflow */
+       uint64_t tcpsc_reset;           /* # dropped due to RST */
+       uint64_t tcpsc_unreach;         /* # dropped due to ICMP unreach */
+       uint64_t tcpsc_bucketoverflow;  /* # dropped due to bucket overflow */
+       uint64_t tcpsc_aborted;         /* # of entries aborted (no mem) */
+       uint64_t tcpsc_dupesyn;         /* # of duplicate SYNs received */
+       uint64_t tcpsc_dropped;         /* # of SYNs dropped (no route/mem) */
+       uint64_t tcpsc_collisions;      /* # of hash collisions */
+       uint64_t tcpsc_retransmitted;   /* # of retransmissions */
+       uint64_t tcpsc_seedrandom;      /* # of syn cache seeds with random */
+       uint64_t tcpsc_hash_size;       /* hash buckets in current syn cache */
+       uint64_t tcpsc_entry_count;     /* # of entries in current syn cache */
+       uint64_t tcpsc_entry_limit;     /* limit of syn cache entries */
+       uint64_t tcpsc_bucket_maxlen;   /* maximum # of entries in any bucket */
+       uint64_t tcpsc_bucket_limit;    /* limit of syn cache bucket list */
+       uint64_t tcpsc_uses_left;       /* use counter of current syn cache */
+};
+
+#define tcpc_inc(_c) do {                                              \
+       struct counters_ref r;                                          \
+       uint64_t *tcpc = counters_enter(&r, tcpcounters);               \
+       tcpc[_c]++;                                                     \
+       counters_leave(&r, tcpcounters);                                \
+} while (0)
+
+#define tcpc_add(_c, _v) do {                                          \
+       struct counters_ref r;                                          \
+       uint64_t *tcpc = counters_enter(&r, tcpcounters);               \
+       tcpc[_c] += (_v);                                               \
+       counters_leave(&r, tcpcounters);                                \
+} while (0)
+
+#define tcpc_pkt(_p, _b, _v) do {                                      \
+       struct counters_ref r;                                          \
+       uint64_t *tcpc = counters_enter(&r, tcpcounters);               \
+       tcpc[_p]++;                                                     \
+       tcpc[_b] += (_v);                                               \
+       counters_leave(&r, tcpcounters);                                \
+} while (0)
+
+#endif /* _KERNEL */
+
 struct tcpstat {
        u_int32_t tcps_connattempt;     /* connections initiated */
        u_int32_t tcps_accepts;         /* connections accepted */
@@ -566,9 +706,10 @@ struct tcp_ident_mapping {
 };
 
 #ifdef _KERNEL
+#include <sys/percpu.h>
 extern struct inpcbtable tcbtable;     /* head of queue of active tcpcb's */
-extern struct tcpstat tcpstat; /* tcp statistics */
-extern u_int32_t tcp_now;              /* for RFC 1323 timestamps */
+extern struct cpumem *tcpcounters;     /* tcp statistics */
+extern u_int32_t tcp_now;      /* for RFC 1323 timestamps */
 extern int tcp_do_rfc1323;     /* enabled/disabled? */
 extern int tcptv_keep_init;    /* time to keep alive the initial SYN packet */
 extern int tcp_mssdflt;        /* default maximum segment size */
Index: netinet6/ip6_output.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.211
diff -u -p -r1.211 ip6_output.c
--- netinet6/ip6_output.c       1 Jul 2016 18:18:57 -0000       1.211
+++ netinet6/ip6_output.c       17 Aug 2016 11:51:55 -0000
@@ -2861,7 +2861,7 @@ in6_proto_cksum_out(struct mbuf *m, stru
                if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv6) ||
                    ip6->ip6_nxt != IPPROTO_TCP ||
                    ifp->if_bridgeport != NULL) {
-                       tcpstat.tcps_outswcsum++;
+                       tcpc_inc(tcpc_outswcsum);
                        in6_delayed_cksum(m, IPPROTO_TCP);
                        m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
                }
Index: sys/percpu.h
===================================================================
RCS file: sys/percpu.h
diff -N sys/percpu.h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ sys/percpu.h        17 Aug 2016 11:51:55 -0000
@@ -0,0 +1,171 @@
+/*     $OpenBSD$ */
+
+/*
+ * Copyright (c) 2016 David Gwynne <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _SYS_PERCPU_H_
+#define _SYS_PERCPU_H_
+
+#include <sys/atomic.h>
+
+#ifndef CACHELINESIZE
+#define CACHELINESIZE 64
+#endif
+
+#ifndef __upunused /* this should go in param.h */
+#ifdef MULTIPROCESSOR
+#define __upunused
+#else
+#define __upunused __attribute__((__unused__))
+#endif
+#endif
+
+struct cpumem {
+       void            *mem;
+};
+
+struct cpumem_iter {
+       unsigned int    cpu;
+} __upunused;
+
+struct counters_ref {
+       uint64_t         g;
+       uint64_t        *c;
+};
+
+#ifdef _KERNEL
+struct pool;
+
+struct cpumem  *cpumem_get(struct pool *);
+void            cpumem_put(struct pool *, struct cpumem *);
+
+struct cpumem  *cpumem_malloc(size_t, int);
+struct cpumem  *cpumem_realloc(struct cpumem *, size_t, int);
+void            cpumem_free(struct cpumem *, int, size_t);
+
+#ifdef MULTIPROCESSOR
+static inline void *
+cpumem_enter(struct cpumem *cm)
+{
+       return (cm[cpu_number()].mem);
+}
+
+static inline void
+cpumem_leave(struct cpumem *cm, void *mem)
+{
+       /* KDASSERT? */
+}
+
+void           *cpumem_first(struct cpumem_iter *, struct cpumem *);
+void           *cpumem_next(struct cpumem_iter *, struct cpumem *);
+
+#define CPUMEM_BOOT_MEMORY(_name, _sz)                                 \
+static struct {                                                                
\
+       unsigned char   mem[_sz];                                       \
+       struct cpumem   cpumem;                                         \
+} __aligned(CACHELINESIZE) _name##_boot_cpumem = {                     \
+       .cpumem = { _name##_boot_cpumem.mem }                           \
+}
+
+#define CPUMEM_BOOT_INITIALIZER(_name)                                 \
+       { &_name##_boot_cpumem.cpumem }
+
+#else /* MULTIPROCESSOR */
+static inline void *
+cpumem_enter(struct cpumem *cm)
+{
+       return (cm);
+}
+
+static inline void
+cpumem_leave(struct cpumem *cm, void *mem)
+{
+       /* KDASSERT? */
+}
+
+static inline void *
+cpumem_first(struct cpumem_iter *i, struct cpumem *cm)
+{
+       return (cm);
+}
+
+static inline void *
+cpumem_next(struct cpumem_iter *i, struct cpumem *cm)
+{
+       return (NULL);
+}
+
+#define CPUMEM_BOOT_MEMORY(_name, _sz)                                 \
+static struct {                                                                
\
+       unsigned char   mem[_sz];                                       \
+} _name##_boot_cpumem
+
+#define CPUMEM_BOOT_INITIALIZER(_name)                                         
\
+       { (struct cpumem *)&_name##_boot_cpumem.mem }
+
+#endif /* MULTIPROCESSOR */
+
+#define CPUMEM_FOREACH(_var, _iter, _cpumem)                           \
+       for ((_var) = cpumem_first((_iter), (_cpumem));                 \
+           (_var) != NULL;                                             \
+           (_var) = cpumem_next((_iter), (_cpumem)))
+
+struct cpumem  *counters_alloc(unsigned int, int);
+struct cpumem  *counters_realloc(struct cpumem *, unsigned int, int);
+void            counters_free(struct cpumem *, int, unsigned int);
+void            counters_read(struct cpumem *, uint64_t *, unsigned int);
+void            counters_zero(struct cpumem *, unsigned int);
+
+#ifdef MULTIPROCESSOR
+static inline uint64_t *
+counters_enter(struct counters_ref *ref, struct cpumem *cm)
+{
+       ref->c = cpumem_enter(cm);
+       ref->g = ++(*ref->c); /* make the generation number odd */
+       return (ref->c + 1);
+}
+
+static inline void
+counters_leave(struct counters_ref *ref, struct cpumem *cm)
+{
+       membar_producer();
+       (*ref->c) = ++ref->g; /* make the generation number even again */
+       cpumem_leave(cm, ref->c);
+}
+#define COUNTERS_BOOT_MEMORY(_name, _n)                                \
+       CPUMEM_BOOT_MEMORY(_name, ((_n) + 1) * sizeof(uint64_t))
+#else
+static inline uint64_t *
+counters_enter(struct counters_ref *r, struct cpumem *cm)
+{
+       r->c = cpumem_enter(cm);
+       return (r->c);
+}
+
+static inline void
+counters_leave(struct counters_ref *r, struct cpumem *cm)
+{
+       cpumem_leave(cm, r->c);
+}
+
+#define COUNTERS_BOOT_MEMORY(_name, _n)                                        
\
+       CPUMEM_BOOT_MEMORY(_name, (_n) * sizeof(uint64_t))
+#endif
+
+#define COUNTERS_BOOT_INITIALIZER(_name)       CPUMEM_BOOT_INITIALIZER(_name)
+
+#endif /* _KERNEL */
+#endif /* _SYS_PERCPU_H_ */

Reply via email to