On Wed, Aug 17, 2016 at 09:13:04PM +1000, David Gwynne wrote:
> On Wed, Aug 17, 2016 at 08:27:19PM +1000, David Gwynne wrote:
> > On Thu, Aug 11, 2016 at 02:43:16PM +1000, David Gwynne wrote:
> > > ive been tinkering with per cpu memory in the kernel.
> >
> > mpi@ wanted to see another example of moving something to per cpu counters
> > and challenged me to do tcpstat.
> >
> > the diff below does that, but with a couple of warts.
> >
> > firstly, it still exports a struct tcpstat to userland. it does
> > this by having an enum that shadows the entries in tcpstat which
> > is used as indexes in the array of counters. mapping the counter
> > array into tcpstat for export is ugly boilerplate.
> >
> > secondly, some tcp syn cache stats dont map well to per cpu counters.
> > to cope with that ive split the syn cache counters out into their
> > own struct. i personally would like to replace the syn cache hash
> > with a red black tree, which would happily solve that problem by
> > accident.
> >
> > could someone test and see if this has an impact on tcp speed?
> >
> > dlg
>
> the last diff was borked. this one should be cleaner.
and again, cos i missed proctors cksum changes.
sigh sigh
Index: conf/files
===================================================================
RCS file: /cvs/src/sys/conf/files,v
retrieving revision 1.624
diff -u -p -r1.624 files
--- conf/files 13 Aug 2016 20:35:57 -0000 1.624
+++ conf/files 17 Aug 2016 11:51:54 -0000
@@ -692,6 +692,7 @@ file kern/subr_evcount.c
file kern/subr_extent.c
file kern/subr_hibernate.c hibernate
file kern/subr_log.c
+file kern/subr_percpu.c
file kern/subr_poison.c diagnostic
file kern/subr_pool.c
file kern/dma_alloc.c
Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.253
diff -u -p -r1.253 init_main.c
--- kern/init_main.c 17 May 2016 23:28:03 -0000 1.253
+++ kern/init_main.c 17 Aug 2016 11:51:54 -0000
@@ -143,6 +143,7 @@ void init_exec(void);
void kqueue_init(void);
void taskq_init(void);
void pool_gc_pages(void *);
+void percpu_init(void);
extern char sigcode[], esigcode[], sigcoderet[];
#ifdef SYSCALL_DEBUG
@@ -354,6 +355,9 @@ main(void *framep)
/* Configure virtual memory system, set vm rlimits. */
uvm_init_limits(p);
+ /* Per CPU memory allocation */
+ percpu_init();
+
/* Initialize the file systems. */
#if defined(NFSSERVER) || defined(NFSCLIENT)
nfs_init(); /* initialize server/shared data */
Index: kern/subr_percpu.c
===================================================================
RCS file: kern/subr_percpu.c
diff -N kern/subr_percpu.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ kern/subr_percpu.c 17 Aug 2016 11:51:54 -0000
@@ -0,0 +1,326 @@
+/* $OpenBSD$ */
+
+/*
+ * Copyright (c) 2016 David Gwynne <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/pool.h>
+#include <sys/malloc.h>
+#include <sys/types.h>
+
+#include <sys/percpu.h>
+
+#ifdef MULTIPROCESSOR
+struct pool cpumem_pl;
+
+void
+percpu_init(void)
+{
+ pool_init(&cpumem_pl, sizeof(struct cpumem) * ncpus, 0, 0,
+ PR_WAITOK, "percpumem", &pool_allocator_single);
+ pool_setipl(&cpumem_pl, IPL_NONE);
+}
+
+struct cpumem *
+cpumem_get(struct pool *pp)
+{
+ struct cpumem *cm;
+ unsigned int cpu;
+
+ cm = pool_get(&cpumem_pl, PR_WAITOK);
+
+ for (cpu = 0; cpu < ncpus; cpu++)
+ cm[cpu].mem = pool_get(pp, PR_WAITOK | PR_ZERO);
+
+ return (cm);
+}
+
+void
+cpumem_put(struct pool *pp, struct cpumem *cm)
+{
+ unsigned int cpu;
+
+ for (cpu = 0; cpu < ncpus; cpu++)
+ pool_put(pp, cm[cpu].mem);
+
+ pool_put(&cpumem_pl, cm);
+}
+
+struct cpumem *
+cpumem_malloc(size_t sz, int type)
+{
+ struct cpumem *cm;
+ unsigned int cpu;
+
+ sz = roundup(sz, CACHELINESIZE);
+
+ cm = pool_get(&cpumem_pl, PR_WAITOK);
+
+ for (cpu = 0; cpu < ncpus; cpu++)
+ cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO);
+
+ return (cm);
+}
+
+struct cpumem *
+cpumem_realloc(struct cpumem *bootcm, size_t sz, int type)
+{
+ struct cpumem *cm;
+ unsigned int cpu;
+
+ sz = roundup(sz, CACHELINESIZE);
+
+ cm = pool_get(&cpumem_pl, PR_WAITOK);
+
+ cm[0].mem = bootcm[0].mem;
+ for (cpu = 1; cpu < ncpus; cpu++)
+ cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO);
+
+ return (cm);
+}
+
+void
+cpumem_free(struct cpumem *cm, int type, size_t sz)
+{
+ unsigned int cpu;
+
+ sz = roundup(sz, CACHELINESIZE);
+
+ for (cpu = 0; cpu < ncpus; cpu++)
+ free(cm[cpu].mem, type, sz);
+
+ pool_put(&cpumem_pl, cm);
+}
+
+void *
+cpumem_first(struct cpumem_iter *i, struct cpumem *cm)
+{
+ i->cpu = 0;
+
+ return (cm[0].mem);
+}
+
+void *
+cpumem_next(struct cpumem_iter *i, struct cpumem *cm)
+{
+ unsigned int cpu = ++i->cpu;
+
+ if (cpu >= ncpus)
+ return (NULL);
+
+ return (cm[cpu].mem);
+}
+
+struct cpumem *
+counters_alloc(unsigned int n, int type)
+{
+ struct cpumem *cm;
+ struct cpumem_iter cmi;
+ uint64_t *counters;
+ unsigned int i;
+
+ KASSERT(n > 0);
+
+ n++; /* add space for a generation number */
+ cm = cpumem_malloc(n * sizeof(uint64_t), type);
+
+ CPUMEM_FOREACH(counters, &cmi, cm) {
+ for (i = 0; i < n; i++)
+ counters[i] = 0;
+ }
+
+ return (cm);
+}
+
+struct cpumem *
+counters_realloc(struct cpumem *cm, unsigned int n, int type)
+{
+ n++; /* the generation number */
+ return (cpumem_realloc(cm, n * sizeof(uint64_t), type));
+}
+
+void
+counters_free(struct cpumem *cm, int type, unsigned int n)
+{
+ n++; /* generation number */
+ cpumem_free(cm, type, n * sizeof(uint64_t));
+}
+
+void
+counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
+{
+ struct cpumem_iter cmi;
+ uint64_t *gen, *counters, *temp;
+ uint64_t enter, leave;
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ output[i] = 0;
+
+ temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
+
+ gen = cpumem_first(&cmi, cm);
+ do {
+ counters = gen + 1;
+
+ enter = *gen;
+ for (;;) {
+ /* the generation number is odd during an update */
+ while (enter & 1) {
+ yield();
+ membar_consumer();
+ enter = *gen;
+ }
+
+ for (i = 0; i < n; i++)
+ temp[i] = counters[i];
+
+ membar_consumer();
+ leave = *gen;
+
+ if (enter == leave)
+ break;
+
+ enter = leave;
+ }
+
+ for (i = 0; i < n; i++)
+ output[i] += temp[i];
+
+ gen = cpumem_next(&cmi, cm);
+ } while (gen != NULL);
+
+ free(temp, M_TEMP, n * sizeof(uint64_t));
+}
+
+void
+counters_zero(struct cpumem *cm, unsigned int n)
+{
+ struct cpumem_iter cmi;
+ uint64_t *counters;
+ unsigned int i;
+
+ n++; /* zero the generation numbers too */
+
+ counters = cpumem_first(&cmi, cm);
+ do {
+ for (i = 0; i < n; i++)
+ counters[i] = 0;
+
+ counters = cpumem_next(&cmi, cm);
+ } while (counters != NULL);
+}
+
+#else /* MULTIPROCESSOR */
+
+/*
+ * Uniprocessor implementation of per-CPU data structures.
+ *
+ * UP percpu memory is a single memory allocation cast to/from the
+ * cpumem struct. It is not scaled up to the size of cacheline because
+ * there's no other cache to contend with.
+ */
+
+void
+percpu_init(void)
+{
+ /* nop */
+}
+
+struct cpumem *
+cpumem_get(struct pool *pp)
+{
+ return (pool_get(pp, PR_WAITOK));
+}
+
+void
+cpumem_put(struct pool *pp, struct cpumem *cm)
+{
+ pool_put(pp, cm);
+}
+
+struct cpumem *
+cpumem_malloc(size_t sz, int type)
+{
+ return (malloc(sz, type, M_WAITOK));
+}
+
+struct cpumem *
+cpumem_realloc(struct cpumem *cm, size_t sz, int type)
+{
+ return (cm);
+}
+
+void
+cpumem_free(struct cpumem *cm, int type, size_t sz)
+{
+ free(cm, type, sz);
+}
+
+struct cpumem *
+counters_alloc(unsigned int n, int type)
+{
+ KASSERT(n > 0);
+
+ return (cpumem_malloc(n * sizeof(uint64_t), type));
+}
+
+struct cpumem *
+counters_realloc(struct cpumem *cm, unsigned int n, int type)
+{
+ /* this is unecessary, but symmetrical */
+ return (cpumem_realloc(cm, n * sizeof(uint64_t), type));
+}
+
+void
+counters_free(struct cpumem *cm, int type, unsigned int n)
+{
+ cpumem_free(cm, type, n * sizeof(uint64_t));
+}
+
+void
+counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
+{
+ uint64_t *counters;
+ unsigned int i;
+ int s;
+
+ counters = (uint64_t *)cm;
+
+ s = splhigh();
+ for (i = 0; i < n; i++)
+ output[i] = counters[i];
+ splx(s);
+}
+
+void
+counters_zero(struct cpumem *cm, unsigned int n)
+{
+ uint64_t *counters;
+ unsigned int i;
+ int s;
+
+ counters = (uint64_t *)cm;
+
+ s = splhigh();
+ for (i = 0; i < n; i++)
+ counters[i] = 0;
+ splx(s);
+}
+
+#endif /* MULTIPROCESSOR */
+
Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.980
diff -u -p -r1.980 pf.c
--- net/pf.c 17 Aug 2016 03:24:11 -0000 1.980
+++ net/pf.c 17 Aug 2016 11:51:55 -0000
@@ -6104,7 +6104,7 @@ pf_check_tcp_cksum(struct mbuf *m, int o
}
/* need to do it in software */
- tcpstat.tcps_inswcsum++;
+ tcpc_inc(tcpc_inswcsum);
switch (af) {
case AF_INET:
@@ -6125,7 +6125,7 @@ pf_check_tcp_cksum(struct mbuf *m, int o
unhandled_af(af);
}
if (sum) {
- tcpstat.tcps_rcvbadsum++;
+ tcpc_inc(tcpc_rcvbadsum);
m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD;
return (1);
}
Index: netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.326
diff -u -p -r1.326 ip_output.c
--- netinet/ip_output.c 15 Aug 2016 11:35:25 -0000 1.326
+++ netinet/ip_output.c 17 Aug 2016 11:51:55 -0000
@@ -1800,7 +1800,7 @@ in_proto_cksum_out(struct mbuf *m, struc
if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
ip->ip_hl != 5 || ifp->if_bridgeport != NULL) {
- tcpstat.tcps_outswcsum++;
+ tcpc_inc(tcpc_outswcsum);
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
}
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.325
diff -u -p -r1.325 tcp_input.c
--- netinet/tcp_input.c 20 Jul 2016 09:15:28 -0000 1.325
+++ netinet/tcp_input.c 17 Aug 2016 11:51:55 -0000
@@ -220,7 +220,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd
if (tiqe == NULL || th->th_seq != tp->rcv_nxt) {
/* Flush segment queue for this connection */
tcp_freeq(tp);
- tcpstat.tcps_rcvmemdrop++;
+ tcpc_inc(tcpc_rcvmemdrop);
m_freem(m);
return (0);
}
@@ -247,8 +247,8 @@ tcp_reass(struct tcpcb *tp, struct tcphd
i = phdr->th_seq + phdr->th_reseqlen - th->th_seq;
if (i > 0) {
if (i >= *tlen) {
- tcpstat.tcps_rcvduppack++;
- tcpstat.tcps_rcvdupbyte += *tlen;
+ tcpc_pkt(tcpc_rcvduppack,
+ tcpc_rcvdupbyte, *tlen);
m_freem(m);
pool_put(&tcpqe_pool, tiqe);
return (0);
@@ -258,8 +258,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd
th->th_seq += i;
}
}
- tcpstat.tcps_rcvoopack++;
- tcpstat.tcps_rcvoobyte += *tlen;
+ tcpc_pkt(tcpc_rcvoopack, tcpc_rcvoobyte, *tlen);
/*
* While we overlap succeeding segments trim them or,
@@ -372,6 +371,8 @@ tcp_input(struct mbuf *m, ...)
int iphlen;
va_list ap;
struct tcphdr *th;
+ struct counters_ref r;
+ uint64_t *tcpc;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
#endif /* INET6 */
@@ -390,7 +391,7 @@ tcp_input(struct mbuf *m, ...)
iphlen = va_arg(ap, int);
va_end(ap);
- tcpstat.tcps_rcvtotal++;
+ tcpc_inc(tcpc_rcvtotal);
opti.ts_present = 0;
opti.maxseg = 0;
@@ -449,7 +450,7 @@ tcp_input(struct mbuf *m, ...)
IP6_EXTHDR_GET(th, struct tcphdr *, m, iphlen, sizeof(*th));
if (!th) {
- tcpstat.tcps_rcvshort++;
+ tcpc_inc(tcpc_rcvshort);
return;
}
@@ -509,10 +510,10 @@ tcp_input(struct mbuf *m, ...)
int sum;
if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD) {
- tcpstat.tcps_rcvbadsum++;
+ tcpc_inc(tcpc_rcvbadsum);
goto drop;
}
- tcpstat.tcps_inswcsum++;
+ tcpc_inc(tcpc_inswcsum);
switch (af) {
case AF_INET:
sum = in4_cksum(m, IPPROTO_TCP, iphlen, tlen);
@@ -525,7 +526,7 @@ tcp_input(struct mbuf *m, ...)
#endif
}
if (sum != 0) {
- tcpstat.tcps_rcvbadsum++;
+ tcpc_inc(tcpc_rcvbadsum);
goto drop;
}
}
@@ -536,14 +537,14 @@ tcp_input(struct mbuf *m, ...)
*/
off = th->th_off << 2;
if (off < sizeof(struct tcphdr) || off > tlen) {
- tcpstat.tcps_rcvbadoff++;
+ tcpc_inc(tcpc_rcvbadoff);
goto drop;
}
tlen -= off;
if (off > sizeof(struct tcphdr)) {
IP6_EXTHDR_GET(th, struct tcphdr *, m, iphlen, off);
if (!th) {
- tcpstat.tcps_rcvshort++;
+ tcpc_inc(tcpc_rcvshort);
return;
}
optlen = off - sizeof(struct tcphdr);
@@ -603,7 +604,7 @@ findpcb:
int inpl_reverse = 0;
if (m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST)
inpl_reverse = 1;
- ++tcpstat.tcps_pcbhashmiss;
+ tcpc_inc(tcpc_pcbhashmiss);
switch (af) {
#ifdef INET6
case AF_INET6:
@@ -625,7 +626,7 @@ findpcb:
* but should either do a listen or a connect soon.
*/
if (inp == NULL) {
- ++tcpstat.tcps_noport;
+ tcpc_inc(tcpc_noport);
goto dropwithreset_ratelim;
}
}
@@ -842,14 +843,14 @@ findpcb:
case AF_INET6:
if
(IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
&ip6->ip6_dst)) {
- tcpstat.tcps_badsyn++;
+ tcpc_inc(tcpc_badsyn);
goto drop;
}
break;
#endif /* INET6 */
case AF_INET:
if (ip->ip_dst.s_addr ==
ip->ip_src.s_addr) {
- tcpstat.tcps_badsyn++;
+ tcpc_inc(tcpc_badsyn);
goto drop;
}
break;
@@ -863,7 +864,7 @@ findpcb:
if (so->so_qlen > so->so_qlimit ||
syn_cache_add(&src.sa, &dst.sa, th, iphlen,
so, m, optp, optlen, &opti, reuse) == -1) {
- tcpstat.tcps_dropsyn++;
+ tcpc_inc(tcpc_dropsyn);
goto drop;
}
return;
@@ -896,7 +897,7 @@ findpcb:
ipsp_spd_lookup(m, af, iphlen, &error, IPSP_DIRECTION_IN,
tdb, inp, 0);
if (error) {
- tcpstat.tcps_rcvnosec++;
+ tcpc_inc(tcpc_rcvnosec);
goto drop;
}
#endif /* IPSEC */
@@ -942,7 +943,7 @@ findpcb:
/* if congestion experienced, set ECE bit in subsequent packets. */
if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
tp->t_flags |= TF_RCVD_CE;
- tcpstat.tcps_ecn_rcvce++;
+ tcpc_inc(tcpc_ecn_rcvce);
}
#endif
/*
@@ -988,7 +989,6 @@ findpcb:
/*
* this is a pure ack for outstanding data.
*/
- ++tcpstat.tcps_predack;
if (opti.ts_present && opti.ts_ecr)
tcp_xmit_timer(tp, tcp_now -
opti.ts_ecr);
else if (tp->t_rtttime &&
@@ -996,8 +996,11 @@ findpcb:
tcp_xmit_timer(tp,
tcp_now - tp->t_rtttime);
acked = th->th_ack - tp->snd_una;
- tcpstat.tcps_rcvackpack++;
- tcpstat.tcps_rcvackbyte += acked;
+ tcpc = counters_enter(&r, tcpcounters);
+ tcpc[tcpc_predack]++;
+ tcpc[tcpc_rcvackpack]++;
+ tcpc[tcpc_rcvackbyte] += acked;
+ counters_leave(&r, tcpcounters);
ND6_HINT(tp);
sbdrop(&so->so_snd, acked);
@@ -1074,10 +1077,12 @@ findpcb:
if (tp->sack_enable && tp->rcv_numsacks)
tcp_clean_sackreport(tp);
#endif /* TCP_SACK */
- ++tcpstat.tcps_preddat;
tp->rcv_nxt += tlen;
- tcpstat.tcps_rcvpack++;
- tcpstat.tcps_rcvbyte += tlen;
+ tcpc = counters_enter(&r, tcpcounters);
+ tcpc[tcpc_preddat]++;
+ tcpc[tcpc_rcvpack]++;
+ tcpc[tcpc_rcvbyte] += tlen;
+ counters_leave(&r, tcpcounters);
ND6_HINT(tp);
TCP_SETUP_ACK(tp, tiflags, m);
@@ -1144,7 +1149,7 @@ findpcb:
case TCPS_SYN_RECEIVED:
if (tiflags & TH_ACK) {
if (tiflags & TH_SYN) {
- tcpstat.tcps_badsyn++;
+ tcpc_inc(tcpc_badsyn);
goto dropwithreset;
}
if (SEQ_LEQ(th->th_ack, tp->snd_una) ||
@@ -1216,13 +1221,13 @@ findpcb:
case TH_ECE|TH_CWR:
tp->t_flags |= TF_ECN_PERMIT;
tiflags &= ~(TH_ECE|TH_CWR);
- tcpstat.tcps_ecn_accepts++;
+ tcpc_inc(tcpc_ecn_accepts);
}
}
#endif
if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
- tcpstat.tcps_connects++;
+ tcpc_inc(tcpc_connects);
soisconnected(so);
tp->t_state = TCPS_ESTABLISHED;
TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
@@ -1266,8 +1271,8 @@ trimthenstep6:
m_adj(m, -todrop);
tlen = tp->rcv_wnd;
tiflags &= ~TH_FIN;
- tcpstat.tcps_rcvpackafterwin++;
- tcpstat.tcps_rcvbyteafterwin += todrop;
+ tcpc_pkt(tcpc_rcvpackafterwin,
+ tcpc_rcvbyteafterwin, todrop);
}
tp->snd_wl1 = th->th_seq - 1;
tp->rcv_up = th->th_seq;
@@ -1333,9 +1338,11 @@ trimthenstep6:
*/
tp->ts_recent = 0;
} else {
- tcpstat.tcps_rcvduppack++;
- tcpstat.tcps_rcvdupbyte += tlen;
- tcpstat.tcps_pawsdrop++;
+ tcpc = counters_enter(&r, tcpcounters);
+ tcpc[tcpc_pawsdrop]++;
+ tcpc[tcpc_rcvduppack]++;
+ tcpc[tcpc_rcvdupbyte] += tlen;
+ counters_leave(&r, tcpcounters);
goto dropafterack;
}
}
@@ -1364,11 +1371,12 @@ trimthenstep6:
* but keep on processing for RST or ACK.
*/
tp->t_flags |= TF_ACKNOW;
- tcpstat.tcps_rcvdupbyte += todrop = tlen;
- tcpstat.tcps_rcvduppack++;
+ todrop = tlen;
+ tcpc_pkt(tcpc_rcvduppack,
+ tcpc_rcvdupbyte, todrop);
} else {
- tcpstat.tcps_rcvpartduppack++;
- tcpstat.tcps_rcvpartdupbyte += todrop;
+ tcpc_pkt(tcpc_rcvpartduppack,
+ tcpc_rcvpartdupbyte, todrop);
}
hdroptlen += todrop; /* drop from head afterwards */
th->th_seq += todrop;
@@ -1388,7 +1396,7 @@ trimthenstep6:
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
tp = tcp_close(tp);
- tcpstat.tcps_rcvafterclose++;
+ tcpc_inc(tcpc_rcvafterclose);
goto dropwithreset;
}
@@ -1398,9 +1406,10 @@ trimthenstep6:
*/
todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd);
if (todrop > 0) {
- tcpstat.tcps_rcvpackafterwin++;
+ tcpc = counters_enter(&r, tcpcounters);
+ tcpc[tcpc_rcvpackafterwin]++;
if (todrop >= tlen) {
- tcpstat.tcps_rcvbyteafterwin += tlen;
+ tcpc[tcpc_rcvbyteafterwin] += tlen;
/*
* If window is closed can only take segments at
* window edge, and have to drop data and PUSH from
@@ -1410,11 +1419,14 @@ trimthenstep6:
*/
if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
tp->t_flags |= TF_ACKNOW;
- tcpstat.tcps_rcvwinprobe++;
- } else
+ tcpc[tcpc_rcvwinprobe]++;
+ } else {
+ counters_leave(&r, tcpcounters);
goto dropafterack;
+ }
} else
- tcpstat.tcps_rcvbyteafterwin += todrop;
+ tcpc[tcpc_rcvbyteafterwin] += todrop;
+ counters_leave(&r, tcpcounters);
m_adj(m, -todrop);
tlen -= todrop;
tiflags &= ~(TH_PUSH|TH_FIN);
@@ -1468,7 +1480,7 @@ trimthenstep6:
so->so_error = ECONNRESET;
close:
tp->t_state = TCPS_CLOSED;
- tcpstat.tcps_drops++;
+ tcpc_inc(tcpc_drops);
tp = tcp_close(tp);
goto drop;
case TCPS_CLOSING:
@@ -1507,7 +1519,7 @@ trimthenstep6:
* The ACK was checked above.
*/
case TCPS_SYN_RECEIVED:
- tcpstat.tcps_connects++;
+ tcpc_inc(tcpc_connects);
soisconnected(so);
tp->t_state = TCPS_ESTABLISHED;
TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
@@ -1555,10 +1567,10 @@ trimthenstep6:
tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_last = tp->snd_max;
tp->t_flags |= TF_SEND_CWR;
- tcpstat.tcps_cwr_ecn++;
+ tcpc_inc(tcpc_cwr_ecn);
}
}
- tcpstat.tcps_ecn_rcvece++;
+ tcpc_inc(tcpc_ecn_rcvece);
}
/*
* if we receive CWR, we know that the peer has reduced
@@ -1566,7 +1578,7 @@ trimthenstep6:
*/
if ((tiflags & TH_CWR)) {
tp->t_flags &= ~TF_RCVD_CE;
- tcpstat.tcps_ecn_rcvcwr++;
+ tcpc_inc(tcpc_ecn_rcvcwr);
}
#endif /* TCP_ECN */
@@ -1588,7 +1600,7 @@ trimthenstep6:
if (th->th_seq != tp->rcv_nxt &&
SEQ_LT(th->th_ack,
tp->snd_una - tp->max_sndwnd)) {
- tcpstat.tcps_rcvacktooold++;
+ tcpc_inc(tcpc_rcvacktooold);
goto drop;
}
break;
@@ -1604,7 +1616,7 @@ trimthenstep6:
break;
}
if (tiwin == tp->snd_wnd) {
- tcpstat.tcps_rcvdupack++;
+ tcpc_inc(tcpc_rcvdupack);
/*
* If we have outstanding data (other than
* a window probe), this is a completely
@@ -1669,8 +1681,12 @@ trimthenstep6:
#ifdef TCP_ECN
tp->t_flags |= TF_SEND_CWR;
#endif
- tcpstat.tcps_cwr_frecovery++;
-
tcpstat.tcps_sack_recovery_episode++;
+ tcpc = counters_enter(&r,
+ tcpcounters);
+ tcpc[tcpc_cwr_frecovery]++;
+
tcpc[tcpc_sack_recovery_episode]++;
+ counters_leave(&r,
+ tcpcounters);
#if defined(TCP_SACK) && defined(TCP_FACK)
tp->t_dupacks = tcprexmtthresh;
(void) tcp_output(tp);
@@ -1698,8 +1714,10 @@ trimthenstep6:
#ifdef TCP_ECN
tp->t_flags |= TF_SEND_CWR;
#endif
- tcpstat.tcps_cwr_frecovery++;
- tcpstat.tcps_sndrexmitfast++;
+ tcpc = counters_enter(&r, tcpcounters);
+ tcpc[tcpc_cwr_frecovery]++;
+ tcpc[tcpc_sndrexmitfast]++;
+ counters_leave(&r, tcpcounters);
(void) tcp_output(tp);
tp->snd_cwnd = tp->snd_ssthresh +
@@ -1788,12 +1806,11 @@ trimthenstep6:
tp->t_dupacks = 0;
#endif
if (SEQ_GT(th->th_ack, tp->snd_max)) {
- tcpstat.tcps_rcvacktoomuch++;
+ tcpc_inc(tcpc_rcvacktoomuch);
goto dropafterack_ratelim;
}
acked = th->th_ack - tp->snd_una;
- tcpstat.tcps_rcvackpack++;
- tcpstat.tcps_rcvackbyte += acked;
+ tcpc_pkt(tcpc_rcvackpack, tcpc_rcvackbyte, acked);
/*
* If we have a timestamp reply, update smoothed
@@ -1966,7 +1983,7 @@ step6:
/* keep track of pure window updates */
if (tlen == 0 &&
tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
- tcpstat.tcps_rcvwinupd++;
+ tcpc_inc(tcpc_rcvwinupd);
tp->snd_wnd = tiwin;
tp->snd_wl1 = th->th_seq;
tp->snd_wl2 = th->th_ack;
@@ -2052,8 +2069,7 @@ dodata:
/* XXX */
TCP_SETUP_ACK(tp, tiflags, m);
tp->rcv_nxt += tlen;
tiflags = th->th_flags & TH_FIN;
- tcpstat.tcps_rcvpack++;
- tcpstat.tcps_rcvbyte += tlen;
+ tcpc_pkt(tcpc_rcvpack, tcpc_rcvbyte, tlen);
ND6_HINT(tp);
if (so->so_state & SS_CANTRCVMORE)
m_freem(m);
@@ -2165,7 +2181,7 @@ badsyn:
/*
* Received a bad SYN. Increment counters and dropwithreset.
*/
- tcpstat.tcps_badsyn++;
+ tcpc_inc(tcpc_badsyn);
tp = NULL;
goto dropwithreset;
@@ -2392,7 +2408,7 @@ tcp_dooptions(struct tcpcb *tp, u_char *
}
if ((sigp ? TF_SIGNATURE : 0) ^ (tp->t_flags & TF_SIGNATURE)) {
- tcpstat.tcps_rcvbadsig++;
+ tcpc_inc(tcpc_rcvbadsig);
return (-1);
}
@@ -2400,7 +2416,7 @@ tcp_dooptions(struct tcpcb *tp, u_char *
char sig[16];
if (tdb == NULL) {
- tcpstat.tcps_rcvbadsig++;
+ tcpc_inc(tcpc_rcvbadsig);
return (-1);
}
@@ -2408,11 +2424,11 @@ tcp_dooptions(struct tcpcb *tp, u_char *
return (-1);
if (timingsafe_bcmp(sig, sigp, 16)) {
- tcpstat.tcps_rcvbadsig++;
+ tcpc_inc(tcpc_rcvbadsig);
return (-1);
}
- tcpstat.tcps_rcvgoodsig++;
+ tcpc_inc(tcpc_rcvgoodsig);
}
#endif /* TCP_SIGNATURE */
@@ -2550,7 +2566,7 @@ tcp_sack_option(struct tcpcb *tp, struct
/* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
tmp_cp = cp + 2;
tmp_olen = optlen - 2;
- tcpstat.tcps_sack_rcv_opts++;
+ tcpc_inc(tcpc_sack_rcv_opts);
if (tp->snd_numholes < 0)
tp->snd_numholes = 0;
if (tp->t_maxseg == 0)
@@ -2870,7 +2886,7 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt
else if (rtt > TCP_RTT_MAX)
rtt = TCP_RTT_MAX;
- tcpstat.tcps_rttupdated++;
+ tcpc_inc(tcpc_rttupdated);
if (tp->t_srtt != 0) {
/*
* delta is fixed point with 2 (TCP_RTT_BASE_SHIFT) bits
@@ -3272,6 +3288,7 @@ int tcp_syn_bucket_limit = 3*TCP_SYN_BUC
int tcp_syn_use_limit = 100000;
struct syn_cache_set tcp_syn_cache[2];
+struct tcpscstat syn_cache_stat;
int tcp_syn_cache_active;
#define SYN_HASH(sa, sp, dp, rand) \
@@ -3412,7 +3429,7 @@ syn_cache_insert(struct syn_cache *sc, s
}
}
arc4random_buf(set->scs_random, sizeof(set->scs_random));
- tcpstat.tcps_sc_seedrandom++;
+ syn_cache_stat.tcpsc_seedrandom++;
}
SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa,
@@ -3425,7 +3442,7 @@ syn_cache_insert(struct syn_cache *sc, s
* limit or the total cache size limit.
*/
if (scp->sch_length >= tcp_syn_bucket_limit) {
- tcpstat.tcps_sc_bucketoverflow++;
+ syn_cache_stat.tcpsc_bucketoverflow++;
/*
* Someone might attack our bucket hash function. Reseed
* with random as soon as the passive syn cache gets empty.
@@ -3449,7 +3466,7 @@ syn_cache_insert(struct syn_cache *sc, s
} else if (set->scs_count >= tcp_syn_cache_limit) {
struct syn_cache_head *scp2, *sce;
- tcpstat.tcps_sc_overflowed++;
+ syn_cache_stat.tcpsc_overflowed++;
/*
* The cache is full. Toss the oldest entry in the
* first non-empty bucket we can find.
@@ -3499,7 +3516,7 @@ syn_cache_insert(struct syn_cache *sc, s
set->scs_count++;
set->scs_use--;
- tcpstat.tcps_sc_added++;
+ syn_cache_stat.tcpsc_added++;
/*
* If the active cache has exceeded its use limit and
@@ -3543,7 +3560,7 @@ syn_cache_timer(void *arg)
if (sc->sc_rxttot >= tcptv_keep_init)
goto dropit;
- tcpstat.tcps_sc_retransmitted++;
+ syn_cache_stat.tcpsc_retransmitted++;
(void) syn_cache_respond(sc, NULL);
/* Advance the timer back-off. */
@@ -3554,7 +3571,7 @@ syn_cache_timer(void *arg)
return;
dropit:
- tcpstat.tcps_sc_timed_out++;
+ syn_cache_stat.tcpsc_timed_out++;
syn_cache_rm(sc);
syn_cache_put(sc);
splx(s);
@@ -3835,7 +3852,7 @@ syn_cache_get(struct sockaddr *src, stru
#ifdef TCP_ECN
if (sc->sc_flags & SCF_ECN_PERMIT) {
tp->t_flags |= TF_ECN_PERMIT;
- tcpstat.tcps_ecn_accepts++;
+ tcpc_inc(tcpc_ecn_accepts);
}
#endif
#ifdef TCP_SACK
@@ -3850,7 +3867,7 @@ syn_cache_get(struct sockaddr *src, stru
tp->t_state = TCPS_SYN_RECEIVED;
tp->t_rcvtime = tcp_now;
TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
- tcpstat.tcps_accepts++;
+ tcpc_inc(tcpc_accepts);
tcp_mss(tp, sc->sc_peermaxseg); /* sets t_maxseg */
if (sc->sc_peermaxseg)
@@ -3872,7 +3889,7 @@ syn_cache_get(struct sockaddr *src, stru
tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
tp->last_ack_sent = tp->rcv_nxt;
- tcpstat.tcps_sc_completed++;
+ syn_cache_stat.tcpsc_completed++;
syn_cache_put(sc);
return (so);
@@ -3884,7 +3901,7 @@ abort:
if (so != NULL)
(void) soabort(so);
syn_cache_put(sc);
- tcpstat.tcps_sc_aborted++;
+ syn_cache_stat.tcpsc_aborted++;
return ((struct socket *)(-1));
}
@@ -3913,7 +3930,7 @@ syn_cache_reset(struct sockaddr *src, st
}
syn_cache_rm(sc);
splx(s);
- tcpstat.tcps_sc_reset++;
+ syn_cache_stat.tcpsc_reset++;
syn_cache_put(sc);
}
@@ -3952,7 +3969,7 @@ syn_cache_unreach(struct sockaddr *src,
syn_cache_rm(sc);
splx(s);
- tcpstat.tcps_sc_unreach++;
+ syn_cache_stat.tcpsc_unreach++;
syn_cache_put(sc);
}
@@ -3980,6 +3997,8 @@ syn_cache_add(struct sockaddr *src, stru
struct syn_cache *sc;
struct syn_cache_head *scp;
struct mbuf *ipopts;
+ struct counters_ref r;
+ uint64_t *tcpc;
tp = sototcpcb(so);
@@ -4035,7 +4054,7 @@ syn_cache_add(struct sockaddr *src, stru
*/
if ((sc = syn_cache_lookup(src, dst, &scp, sotoinpcb(so)->inp_rtableid))
!= NULL) {
- tcpstat.tcps_sc_dupesyn++;
+ syn_cache_stat.tcpsc_dupesyn++;
if (ipopts) {
/*
* If we were remembering a previous source route,
@@ -4047,8 +4066,10 @@ syn_cache_add(struct sockaddr *src, stru
}
sc->sc_timestamp = tb.ts_recent;
if (syn_cache_respond(sc, m) == 0) {
- tcpstat.tcps_sndacks++;
- tcpstat.tcps_sndtotal++;
+ tcpc = counters_enter(&r, tcpcounters);
+ tcpc[tcpc_sndacks]++;
+ tcpc[tcpc_sndtotal]++;
+ counters_leave(&r, tcpcounters);
}
return (0);
}
@@ -4135,11 +4156,13 @@ syn_cache_add(struct sockaddr *src, stru
sc->sc_tp = tp;
if (syn_cache_respond(sc, m) == 0) {
syn_cache_insert(sc, tp);
- tcpstat.tcps_sndacks++;
- tcpstat.tcps_sndtotal++;
+ tcpc = counters_enter(&r, tcpcounters);
+ tcpc[tcpc_sndacks]++;
+ tcpc[tcpc_sndtotal]++;
+ counters_leave(&r, tcpcounters);
} else {
syn_cache_put(sc);
- tcpstat.tcps_sc_dropped++;
+ syn_cache_stat.tcpsc_dropped++;
}
return (0);
Index: netinet/tcp_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.118
diff -u -p -r1.118 tcp_output.c
--- netinet/tcp_output.c 19 Jul 2016 21:28:43 -0000 1.118
+++ netinet/tcp_output.c 17 Aug 2016 11:51:55 -0000
@@ -211,6 +211,8 @@ tcp_output(struct tcpcb *tp)
u_char *opt = (u_char *)optbuf;
unsigned int optlen, hdrlen, packetlen;
int idle, sendalot = 0;
+ struct counters_ref r;
+ uint64_t *tcpc;
#ifdef TCP_SACK
int i, sack_rxmit = 0;
struct sackhole *p;
@@ -641,7 +643,7 @@ send:
int count = 0; /* actual number of SACKs inserted */
int maxsack = (MAX_TCPOPTLEN - (optlen + 4))/TCPOLEN_SACK;
- tcpstat.tcps_sack_snd_opts++;
+ tcpc_inc(tcpc_sack_snd_opts);
maxsack = min(maxsack, TCP_MAX_SACK);
for (i = 0; (i < tp->rcv_numsacks && count < maxsack); i++) {
struct sackblk sack = tp->sackblks[i];
@@ -684,15 +686,17 @@ send:
* the template for sends on this connection.
*/
if (len) {
+ tcpc = counters_enter(&r, tcpcounters);
if (tp->t_force && len == 1)
- tcpstat.tcps_sndprobe++;
+ tcpc[tcpc_sndprobe]++;
else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
- tcpstat.tcps_sndrexmitpack++;
- tcpstat.tcps_sndrexmitbyte += len;
+ tcpc[tcpc_sndrexmitpack]++;
+ tcpc[tcpc_sndrexmitbyte] += len;
} else {
- tcpstat.tcps_sndpack++;
- tcpstat.tcps_sndbyte += len;
+ tcpc[tcpc_sndpack]++;
+ tcpc[tcpc_sndbyte] += len;
}
+ counters_leave(&r, tcpcounters);
#ifdef notyet
if ((m = m_copypack(so->so_snd.sb_mb, off,
(int)len, max_linkhdr + hdrlen)) == 0) {
@@ -745,14 +749,16 @@ send:
if (off + len == so->so_snd.sb_cc && !soissending(so))
flags |= TH_PUSH;
} else {
+ tcpc = counters_enter(&r, tcpcounters);
if (tp->t_flags & TF_ACKNOW)
- tcpstat.tcps_sndacks++;
+ tcpc[tcpc_sndacks]++;
else if (flags & (TH_SYN|TH_FIN|TH_RST))
- tcpstat.tcps_sndctrl++;
+ tcpc[tcpc_sndctrl]++;
else if (SEQ_GT(tp->snd_up, tp->snd_una))
- tcpstat.tcps_sndurg++;
+ tcpc[tcpc_sndurg]++;
else
- tcpstat.tcps_sndwinup++;
+ tcpc[tcpc_sndwinup]++;
+ counters_leave(&r, tcpcounters);
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m != NULL && max_linkhdr + hdrlen > MHLEN) {
@@ -823,8 +829,7 @@ send:
#if defined(TCP_SACK) && defined(TCP_FACK)
tp->retran_data += len;
#endif /* TCP_FACK */
- tcpstat.tcps_sack_rexmits++;
- tcpstat.tcps_sack_rexmit_bytes += len;
+ tcpc_pkt(tcpc_sack_rexmits, tcpc_sack_rexmit_bytes, len);
}
#endif /* TCP_SACK */
@@ -841,7 +846,7 @@ send:
*/
if (tp->t_flags & TF_RCVD_CE) {
flags |= TH_ECE;
- tcpstat.tcps_ecn_sndece++;
+ tcpc_inc(tcpc_ecn_sndece);
}
if (!(tp->t_flags & TF_DISABLE_ECN)) {
/*
@@ -862,7 +867,7 @@ send:
(tp->t_flags & TF_SEND_CWR)) {
flags |= TH_CWR;
tp->t_flags &= ~TF_SEND_CWR;
- tcpstat.tcps_ecn_sndcwr++;
+ tcpc_inc(tcpc_ecn_sndcwr);
}
}
#endif
@@ -982,7 +987,7 @@ send:
if (tp->t_rtttime == 0) {
tp->t_rtttime = tcp_now;
tp->t_rtseq = startseq;
- tcpstat.tcps_segstimed++;
+ tcpc_inc(tcpc_segstimed);
}
}
@@ -1073,7 +1078,7 @@ send:
/* don't set ECT */
} else {
needect = 1;
- tcpstat.tcps_ecn_sndect++;
+ tcpc_inc(tcpc_ecn_sndect);
}
}
#endif
@@ -1172,9 +1177,11 @@ out:
if (packetlen > tp->t_pmtud_mtu_sent)
tp->t_pmtud_mtu_sent = packetlen;
- tcpstat.tcps_sndtotal++;
+ tcpc = counters_enter(&r, tcpcounters);
+ tcpc[tcpc_sndtotal]++;
if (tp->t_flags & TF_DELACK)
- tcpstat.tcps_delack++;
+ tcpc[tcpc_delack]++;
+ counters_leave(&r, tcpcounters);
/*
* Data sent (as far as we can tell).
Index: netinet/tcp_subr.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.151
diff -u -p -r1.151 tcp_subr.c
--- netinet/tcp_subr.c 7 Mar 2016 18:44:00 -0000 1.151
+++ netinet/tcp_subr.c 17 Aug 2016 11:51:55 -0000
@@ -131,7 +131,7 @@ struct pool tcpqe_pool;
struct pool sackhl_pool;
#endif
-struct tcpstat tcpstat; /* tcp statistics */
+struct cpumem *tcpcounters; /* tcp statistics */
tcp_seq tcp_iss;
/*
@@ -140,6 +140,7 @@ tcp_seq tcp_iss;
void
tcp_init(void)
{
+ tcpcounters = counters_alloc(tcpc_ncounters, M_PCB);
tcp_iss = 1; /* wrong */
pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcb", NULL);
pool_init(&tcpqe_pool, sizeof(struct tcpqent), 0, 0, 0, "tcpqe", NULL);
@@ -498,9 +499,9 @@ tcp_drop(tp, errno)
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tp->t_state = TCPS_CLOSED;
(void) tcp_output(tp);
- tcpstat.tcps_drops++;
+ tcpc_inc(tcpc_drops);
} else
- tcpstat.tcps_conndrops++;
+ tcpc_inc(tcpc_conndrops);
if (errno == ETIMEDOUT && tp->t_softerror)
errno = tp->t_softerror;
so->so_error = errno;
@@ -559,7 +560,7 @@ tcp_reaper(void *arg)
s = splsoftnet();
pool_put(&tcpcb_pool, tp);
splx(s);
- tcpstat.tcps_closed++;
+ tcpc_inc(tcpc_closed);
}
int
Index: netinet/tcp_timer.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.49
diff -u -p -r1.49 tcp_timer.c
--- netinet/tcp_timer.c 7 Mar 2016 18:44:00 -0000 1.49
+++ netinet/tcp_timer.c 17 Aug 2016 11:51:55 -0000
@@ -234,12 +234,12 @@ tcp_timer_rexmt(void *arg)
#endif
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
- tcpstat.tcps_timeoutdrop++;
+ tcpc_inc(tcpc_timeoutdrop);
(void)tcp_drop(tp, tp->t_softerror ?
tp->t_softerror : ETIMEDOUT);
goto out;
}
- tcpstat.tcps_rexmttimeo++;
+ tcpc_inc(tcpc_rexmttimeo);
rto = TCP_REXMTVAL(tp);
if (rto < tp->t_rttmin)
rto = tp->t_rttmin;
@@ -371,7 +371,7 @@ tcp_timer_rexmt(void *arg)
tp->t_flags |= TF_SEND_CWR;
#endif
#if 1 /* TCP_ECN */
- tcpstat.tcps_cwr_timeout++;
+ tcpc_inc(tcpc_cwr_timeout);
#endif
}
(void) tcp_output(tp);
@@ -393,7 +393,7 @@ tcp_timer_persist(void *arg)
splx(s);
return;
}
- tcpstat.tcps_persisttimeo++;
+ tcpc_inc(tcpc_persisttimeo);
/*
* Hack: if the peer is dead/unreachable, we do not
* time out if the window is closed. After a full
@@ -407,7 +407,7 @@ tcp_timer_persist(void *arg)
if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle ||
(tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) {
- tcpstat.tcps_persistdrop++;
+ tcpc_inc(tcpc_persistdrop);
tp = tcp_drop(tp, ETIMEDOUT);
goto out;
}
@@ -431,7 +431,7 @@ tcp_timer_keep(void *arg)
return;
}
- tcpstat.tcps_keeptimeo++;
+ tcpc_inc(tcpc_keeptimeo);
if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
goto dropit;
if ((tcp_always_keepalive ||
@@ -452,7 +452,7 @@ tcp_timer_keep(void *arg)
* by the protocol spec, this requires the
* correspondent TCP to respond.
*/
- tcpstat.tcps_keepprobe++;
+ tcpc_inc(tcpc_keepprobe);
tcp_respond(tp, mtod(tp->t_template, caddr_t),
NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0);
TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
@@ -463,7 +463,7 @@ tcp_timer_keep(void *arg)
return;
dropit:
- tcpstat.tcps_keepdrops++;
+ tcpc_inc(tcpc_keepdrops);
tp = tcp_drop(tp, ETIMEDOUT);
splx(s);
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.134
diff -u -p -r1.134 tcp_usrreq.c
--- netinet/tcp_usrreq.c 20 Jul 2016 19:57:53 -0000 1.134
+++ netinet/tcp_usrreq.c 17 Aug 2016 11:51:55 -0000
@@ -116,6 +116,7 @@ int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_
struct inpcbtable tcbtable;
int tcp_ident(void *, size_t *, void *, size_t, int);
+int tcp_stats(void *, size_t *, void *, size_t);
/*
* Process a TCP user request for TCP tb. If this is a send request
@@ -288,7 +289,7 @@ tcp_usrreq(so, req, m, nam, control, p)
tcp_rscale(tp, sb_max);
soisconnecting(so);
- tcpstat.tcps_connattempt++;
+ tcpc_inc(tcpc_connattempt);
tp->t_state = TCPS_SYN_SENT;
TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
tcp_set_iss_tsm(tp);
@@ -825,7 +826,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
}
if (inp == NULL) {
- ++tcpstat.tcps_pcbhashmiss;
+ tcpc_inc(tcpc_pcbhashmiss);
switch (tir.faddr.ss_family) {
#ifdef INET6
case AF_INET6:
@@ -934,28 +935,7 @@ tcp_sysctl(name, namelen, oldp, oldlenp,
#endif
case TCPCTL_STATS:
- if (newp != NULL)
- return (EPERM);
- {
- struct syn_cache_set *set;
- int i;
-
- set = &tcp_syn_cache[tcp_syn_cache_active];
- tcpstat.tcps_sc_hash_size = set->scs_size;
- tcpstat.tcps_sc_entry_count = set->scs_count;
- tcpstat.tcps_sc_entry_limit = tcp_syn_cache_limit;
- tcpstat.tcps_sc_bucket_maxlen = 0;
- for (i = 0; i < set->scs_size; i++) {
- if (tcpstat.tcps_sc_bucket_maxlen <
- set->scs_buckethead[i].sch_length)
- tcpstat.tcps_sc_bucket_maxlen =
- set->scs_buckethead[i].sch_length;
- }
- tcpstat.tcps_sc_bucket_limit = tcp_syn_bucket_limit;
- tcpstat.tcps_sc_uses_left = set->scs_use;
- }
- return (sysctl_struct(oldp, oldlenp, newp, newlen,
- &tcpstat, sizeof(tcpstat)));
+ return tcp_stats(oldp, oldlenp, newp, newlen);
case TCPCTL_SYN_USE_LIMIT:
error = sysctl_int(oldp, oldlenp, newp, newlen,
@@ -1002,6 +982,152 @@ tcp_sysctl(name, namelen, oldp, oldlenp,
return (ENOPROTOOPT);
}
/* NOTREACHED */
+}
+
+int
+tcp_stats(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+ struct tcpstat tcpstat;
+ uint64_t tcpc[tcpc_ncounters];
+ extern struct tcpscstat syn_cache_stat;
+ struct syn_cache_set *set;
+ int i;
+
+ if (newp != NULL)
+ return (EPERM);
+
+ counters_read(tcpcounters, tcpc, nitems(tcpc));
+
+#define TCP_C2S(n) tcpstat.tcps_##n = tcpc[tcpc_##n]
+#define TCP_SC2S(n) tcpstat.tcps_sc_##n = syn_cache_stat.tcpsc_##n
+
+ TCP_C2S(connattempt);
+ TCP_C2S(accepts);
+ TCP_C2S(connects);
+ TCP_C2S(drops);
+ TCP_C2S(conndrops);
+ TCP_C2S(closed);
+ TCP_C2S(segstimed);
+ TCP_C2S(rttupdated);
+ TCP_C2S(delack);
+ TCP_C2S(timeoutdrop);
+ TCP_C2S(rexmttimeo);
+ TCP_C2S(persisttimeo);
+ TCP_C2S(persistdrop);
+ TCP_C2S(keeptimeo);
+ TCP_C2S(keepprobe);
+ TCP_C2S(keepdrops);
+
+ TCP_C2S(sndtotal);
+ TCP_C2S(sndpack);
+ TCP_C2S(sndbyte);
+ TCP_C2S(sndrexmitpack);
+ TCP_C2S(sndrexmitbyte);
+ TCP_C2S(sndrexmitfast);
+ TCP_C2S(sndacks);
+ TCP_C2S(sndprobe);
+ TCP_C2S(sndurg);
+ TCP_C2S(sndwinup);
+ TCP_C2S(sndctrl);
+
+ TCP_C2S(rcvtotal);
+ TCP_C2S(rcvpack);
+ TCP_C2S(rcvbyte);
+ TCP_C2S(rcvbadsum);
+ TCP_C2S(rcvbadoff);
+ TCP_C2S(rcvmemdrop);
+ TCP_C2S(rcvnosec);
+ TCP_C2S(rcvshort);
+ TCP_C2S(rcvduppack);
+ TCP_C2S(rcvdupbyte);
+ TCP_C2S(rcvpartduppack);
+ TCP_C2S(rcvpartdupbyte);
+ TCP_C2S(rcvoopack);
+ TCP_C2S(rcvoobyte);
+ TCP_C2S(rcvpackafterwin);
+ TCP_C2S(rcvbyteafterwin);
+ TCP_C2S(rcvafterclose);
+ TCP_C2S(rcvwinprobe);
+ TCP_C2S(rcvdupack);
+ TCP_C2S(rcvacktoomuch);
+ TCP_C2S(rcvacktooold);
+ TCP_C2S(rcvackpack);
+ TCP_C2S(rcvackbyte);
+ TCP_C2S(rcvwinupd);
+ TCP_C2S(pawsdrop);
+ TCP_C2S(predack);
+ TCP_C2S(preddat);
+
+ TCP_C2S(pcbhashmiss);
+ TCP_C2S(noport);
+ TCP_C2S(badsyn);
+ TCP_C2S(dropsyn);
+
+ TCP_C2S(rcvbadsig);
+ TCP_C2S(rcvgoodsig);
+ TCP_C2S(inswcsum);
+ TCP_C2S(outswcsum);
+
+ /* ECN stats */
+ TCP_C2S(ecn_accepts);
+ TCP_C2S(ecn_rcvece);
+ TCP_C2S(ecn_rcvcwr);
+ TCP_C2S(ecn_rcvce);
+ TCP_C2S(ecn_sndect);
+ TCP_C2S(ecn_sndece);
+ TCP_C2S(ecn_sndcwr);
+ TCP_C2S(cwr_ecn);
+ TCP_C2S(cwr_frecovery);
+ TCP_C2S(cwr_timeout);
+
+ /* These statistics deal with the SYN cache. */
+ TCP_SC2S(added);
+ TCP_SC2S(completed);
+ TCP_SC2S(timed_out);
+ TCP_SC2S(overflowed);
+ TCP_SC2S(reset);
+ TCP_SC2S(unreach);
+ TCP_SC2S(bucketoverflow);
+ TCP_SC2S(aborted);
+ TCP_SC2S(dupesyn);
+ TCP_SC2S(dropped);
+ TCP_SC2S(collisions);
+ TCP_SC2S(retransmitted);
+ TCP_SC2S(seedrandom);
+ TCP_SC2S(hash_size);
+ TCP_SC2S(entry_count);
+ TCP_SC2S(bucket_maxlen);
+ TCP_SC2S(bucket_limit);
+ TCP_SC2S(uses_left);
+
+ TCP_C2S(conndrained);
+
+ TCP_C2S(sack_recovery_episode);
+ TCP_C2S(sack_rexmits);
+ TCP_C2S(sack_rexmit_bytes);
+ TCP_C2S(sack_rcv_opts);
+ TCP_C2S(sack_snd_opts);
+
+#undef TCP_C2S
+#undef TCP_SC2S
+
+ set = &tcp_syn_cache[tcp_syn_cache_active];
+ tcpstat.tcps_sc_hash_size = set->scs_size;
+ tcpstat.tcps_sc_entry_count = set->scs_count;
+ tcpstat.tcps_sc_entry_limit = tcp_syn_cache_limit;
+ tcpstat.tcps_sc_bucket_maxlen = 0;
+ for (i = 0; i < set->scs_size; i++) {
+ if (tcpstat.tcps_sc_bucket_maxlen <
+ set->scs_buckethead[i].sch_length) {
+ tcpstat.tcps_sc_bucket_maxlen =
+ set->scs_buckethead[i].sch_length;
+ }
+ }
+ tcpstat.tcps_sc_bucket_limit = tcp_syn_bucket_limit;
+ tcpstat.tcps_sc_uses_left = set->scs_use;
+
+ return (sysctl_struct(oldp, oldlenp, newp, newlen,
+ &tcpstat, sizeof(tcpstat)));
}
/*
Index: netinet/tcp_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.115
diff -u -p -r1.115 tcp_var.h
--- netinet/tcp_var.h 20 Jul 2016 19:57:53 -0000 1.115
+++ netinet/tcp_var.h 17 Aug 2016 11:51:55 -0000
@@ -360,6 +360,146 @@ struct syn_cache_set {
* Many of these should be kept per connection,
* but that's inconvenient at the moment.
*/
+
+#ifdef _KERNEL
+enum tcpcounters {
+ tcpc_connattempt, /* connections initiated */
+ tcpc_accepts, /* connections accepted */
+ tcpc_connects, /* connections established */
+ tcpc_drops, /* connections dropped */
+ tcpc_conndrops, /* embryonic connections dropped */
+ tcpc_closed, /* conn. closed (includes drops) */
+ tcpc_segstimed, /* segs where we tried to get rtt */
+ tcpc_rttupdated, /* times we succeeded */
+ tcpc_delack, /* delayed acks sent */
+ tcpc_timeoutdrop, /* conn. dropped in rxmt timeout */
+ tcpc_rexmttimeo, /* retransmit timeouts */
+ tcpc_persisttimeo, /* persist timeouts */
+ tcpc_persistdrop, /* connections dropped in persist */
+ tcpc_keeptimeo, /* keepalive timeouts */
+ tcpc_keepprobe, /* keepalive probes sent */
+ tcpc_keepdrops, /* connections dropped in keepalive */
+
+ tcpc_sndtotal, /* total packets sent */
+ tcpc_sndpack, /* data packets sent */
+ tcpc_sndbyte, /* data bytes sent */
+ tcpc_sndrexmitpack, /* data packets retransmitted */
+ tcpc_sndrexmitbyte, /* data bytes retransmitted */
+ tcpc_sndrexmitfast, /* Fast retransmits */
+ tcpc_sndacks, /* ack-only packets sent */
+ tcpc_sndprobe, /* window probes sent */
+ tcpc_sndurg, /* packets sent with URG only */
+ tcpc_sndwinup, /* window update-only packets sent */
+ tcpc_sndctrl, /* control (SYN|FIN|RST) packets sent */
+
+ tcpc_rcvtotal, /* total packets received */
+ tcpc_rcvpack, /* packets received in sequence */
+ tcpc_rcvbyte, /* bytes received in sequence */
+ tcpc_rcvbadsum, /* packets received with ccksum errs */
+ tcpc_rcvbadoff, /* packets received with bad offset */
+ tcpc_rcvmemdrop, /* packets dropped for lack of memory */
+ tcpc_rcvnosec, /* packets dropped for lack of ipsec */
+ tcpc_rcvshort, /* packets received too short */
+ tcpc_rcvduppack, /* duplicate-only packets received */
+ tcpc_rcvdupbyte, /* duplicate-only bytes received */
+ tcpc_rcvpartduppack, /* packets with some duplicate data */
+ tcpc_rcvpartdupbyte, /* dup. bytes in part-dup. packets */
+ tcpc_rcvoopack, /* out-of-order packets received */
+ tcpc_rcvoobyte, /* out-of-order bytes received */
+ tcpc_rcvpackafterwin, /* packets with data after window */
+ tcpc_rcvbyteafterwin, /* bytes rcvd after window */
+ tcpc_rcvafterclose, /* packets rcvd after "close" */
+ tcpc_rcvwinprobe, /* rcvd window probe packets */
+ tcpc_rcvdupack, /* rcvd duplicate acks */
+ tcpc_rcvacktoomuch, /* rcvd acks for unsent data */
+ tcpc_rcvacktooold, /* rcvd acks for old data */
+ tcpc_rcvackpack, /* rcvd ack packets */
+ tcpc_rcvackbyte, /* bytes acked by rcvd acks */
+ tcpc_rcvwinupd, /* rcvd window update packets */
+ tcpc_pawsdrop, /* segments dropped due to PAWS */
+ tcpc_predack, /* times hdr predict ok for acks */
+ tcpc_preddat, /* times hdr predict ok for data pkts */
+
+ tcpc_pcbhashmiss, /* input packets missing pcb hash */
+ tcpc_noport, /* no socket on port */
+ tcpc_badsyn, /* SYN packet with src==dst rcv'ed */
+ tcpc_dropsyn, /* SYN packet dropped */
+
+ tcpc_rcvbadsig, /* rcvd bad/missing TCP signatures */
+ tcpc_rcvgoodsig, /* rcvd good TCP signatures */
+ tcpc_inswcsum, /* input software-checksummed packets */
+ tcpc_outswcsum, /* output software-checksummed packets */
+
+ /* ECN stats */
+ tcpc_ecn_accepts, /* ecn connections accepted */
+ tcpc_ecn_rcvece, /* # of rcvd ece */
+ tcpc_ecn_rcvcwr, /* # of rcvd cwr */
+ tcpc_ecn_rcvce, /* # of rcvd ce in ip header */
+ tcpc_ecn_sndect, /* # of cwr sent */
+ tcpc_ecn_sndece, /* # of ece sent */
+ tcpc_ecn_sndcwr, /* # of cwr sent */
+ tcpc_cwr_ecn, /* # of cwnd reduced by ecn */
+ tcpc_cwr_frecovery, /* # of cwnd reduced by fastrecovery */
+ tcpc_cwr_timeout, /* # of cwnd reduced by timeout */
+
+ tcpc_conndrained, /* # of connections drained */
+
+ tcpc_sack_recovery_episode, /* SACK recovery episodes */
+ tcpc_sack_rexmits, /* SACK rexmit segments */
+ tcpc_sack_rexmit_bytes, /* SACK rexmit bytes */
+ tcpc_sack_rcv_opts, /* SACK options received */
+ tcpc_sack_snd_opts, /* SACK options sent */
+
+ tcpc_ncounters
+};
+
+/* These statistics deal with the SYN cache. */
+struct tcpscstat {
+ uint64_t tcpsc_added; /* # of entries added */
+ uint64_t tcpsc_completed; /* # of connections completed */
+ uint64_t tcpsc_timed_out; /* # of entries timed out */
+ uint64_t tcpsc_overflowed; /* # dropped due to overflow */
+ uint64_t tcpsc_reset; /* # dropped due to RST */
+ uint64_t tcpsc_unreach; /* # dropped due to ICMP unreach */
+ uint64_t tcpsc_bucketoverflow; /* # dropped due to bucket overflow */
+ uint64_t tcpsc_aborted; /* # of entries aborted (no mem) */
+ uint64_t tcpsc_dupesyn; /* # of duplicate SYNs received */
+ uint64_t tcpsc_dropped; /* # of SYNs dropped (no route/mem) */
+ uint64_t tcpsc_collisions; /* # of hash collisions */
+ uint64_t tcpsc_retransmitted; /* # of retransmissions */
+ uint64_t tcpsc_seedrandom; /* # of syn cache seeds with random */
+ uint64_t tcpsc_hash_size; /* hash buckets in current syn cache */
+ uint64_t tcpsc_entry_count; /* # of entries in current syn cache */
+ uint64_t tcpsc_entry_limit; /* limit of syn cache entries */
+ uint64_t tcpsc_bucket_maxlen; /* maximum # of entries in any bucket */
+ uint64_t tcpsc_bucket_limit; /* limit of syn cache bucket list */
+ uint64_t tcpsc_uses_left; /* use counter of current syn cache */
+};
+
+#define tcpc_inc(_c) do { \
+ struct counters_ref r; \
+ uint64_t *tcpc = counters_enter(&r, tcpcounters); \
+ tcpc[_c]++; \
+ counters_leave(&r, tcpcounters); \
+} while (0)
+
+#define tcpc_add(_c, _v) do { \
+ struct counters_ref r; \
+ uint64_t *tcpc = counters_enter(&r, tcpcounters); \
+ tcpc[_c] += (_v); \
+ counters_leave(&r, tcpcounters); \
+} while (0)
+
+#define tcpc_pkt(_p, _b, _v) do { \
+ struct counters_ref r; \
+ uint64_t *tcpc = counters_enter(&r, tcpcounters); \
+ tcpc[_p]++; \
+ tcpc[_b] += (_v); \
+ counters_leave(&r, tcpcounters); \
+} while (0)
+
+#endif /* _KERNEL */
+
struct tcpstat {
u_int32_t tcps_connattempt; /* connections initiated */
u_int32_t tcps_accepts; /* connections accepted */
@@ -566,9 +706,10 @@ struct tcp_ident_mapping {
};
#ifdef _KERNEL
+#include <sys/percpu.h>
extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */
-extern struct tcpstat tcpstat; /* tcp statistics */
-extern u_int32_t tcp_now; /* for RFC 1323 timestamps */
+extern struct cpumem *tcpcounters; /* tcp statistics */
+extern u_int32_t tcp_now; /* for RFC 1323 timestamps */
extern int tcp_do_rfc1323; /* enabled/disabled? */
extern int tcptv_keep_init; /* time to keep alive the initial SYN packet */
extern int tcp_mssdflt; /* default maximum segment size */
Index: netinet6/ip6_output.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.211
diff -u -p -r1.211 ip6_output.c
--- netinet6/ip6_output.c 1 Jul 2016 18:18:57 -0000 1.211
+++ netinet6/ip6_output.c 17 Aug 2016 11:51:55 -0000
@@ -2861,7 +2861,7 @@ in6_proto_cksum_out(struct mbuf *m, stru
if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv6) ||
ip6->ip6_nxt != IPPROTO_TCP ||
ifp->if_bridgeport != NULL) {
- tcpstat.tcps_outswcsum++;
+ tcpc_inc(tcpc_outswcsum);
in6_delayed_cksum(m, IPPROTO_TCP);
m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
}
Index: sys/percpu.h
===================================================================
RCS file: sys/percpu.h
diff -N sys/percpu.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ sys/percpu.h 17 Aug 2016 11:51:55 -0000
@@ -0,0 +1,171 @@
+/* $OpenBSD$ */
+
+/*
+ * Copyright (c) 2016 David Gwynne <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _SYS_PERCPU_H_
+#define _SYS_PERCPU_H_
+
+#include <sys/atomic.h>
+
+#ifndef CACHELINESIZE
+#define CACHELINESIZE 64
+#endif
+
+#ifndef __upunused /* this should go in param.h */
+#ifdef MULTIPROCESSOR
+#define __upunused
+#else
+#define __upunused __attribute__((__unused__))
+#endif
+#endif
+
+struct cpumem {
+ void *mem;
+};
+
+struct cpumem_iter {
+ unsigned int cpu;
+} __upunused;
+
+struct counters_ref {
+ uint64_t g;
+ uint64_t *c;
+};
+
+#ifdef _KERNEL
+struct pool;
+
+struct cpumem *cpumem_get(struct pool *);
+void cpumem_put(struct pool *, struct cpumem *);
+
+struct cpumem *cpumem_malloc(size_t, int);
+struct cpumem *cpumem_realloc(struct cpumem *, size_t, int);
+void cpumem_free(struct cpumem *, int, size_t);
+
+#ifdef MULTIPROCESSOR
+static inline void *
+cpumem_enter(struct cpumem *cm)
+{
+ return (cm[cpu_number()].mem);
+}
+
+static inline void
+cpumem_leave(struct cpumem *cm, void *mem)
+{
+ /* KDASSERT? */
+}
+
+void *cpumem_first(struct cpumem_iter *, struct cpumem *);
+void *cpumem_next(struct cpumem_iter *, struct cpumem *);
+
+#define CPUMEM_BOOT_MEMORY(_name, _sz) \
+static struct {
\
+ unsigned char mem[_sz]; \
+ struct cpumem cpumem; \
+} __aligned(CACHELINESIZE) _name##_boot_cpumem = { \
+ .cpumem = { _name##_boot_cpumem.mem } \
+}
+
+#define CPUMEM_BOOT_INITIALIZER(_name) \
+ { &_name##_boot_cpumem.cpumem }
+
+#else /* MULTIPROCESSOR */
+static inline void *
+cpumem_enter(struct cpumem *cm)
+{
+ return (cm);
+}
+
+static inline void
+cpumem_leave(struct cpumem *cm, void *mem)
+{
+ /* KDASSERT? */
+}
+
+static inline void *
+cpumem_first(struct cpumem_iter *i, struct cpumem *cm)
+{
+ return (cm);
+}
+
+static inline void *
+cpumem_next(struct cpumem_iter *i, struct cpumem *cm)
+{
+ return (NULL);
+}
+
+#define CPUMEM_BOOT_MEMORY(_name, _sz) \
+static struct {
\
+ unsigned char mem[_sz]; \
+} _name##_boot_cpumem
+
+#define CPUMEM_BOOT_INITIALIZER(_name)
\
+ { (struct cpumem *)&_name##_boot_cpumem.mem }
+
+#endif /* MULTIPROCESSOR */
+
+#define CPUMEM_FOREACH(_var, _iter, _cpumem) \
+ for ((_var) = cpumem_first((_iter), (_cpumem)); \
+ (_var) != NULL; \
+ (_var) = cpumem_next((_iter), (_cpumem)))
+
+struct cpumem *counters_alloc(unsigned int, int);
+struct cpumem *counters_realloc(struct cpumem *, unsigned int, int);
+void counters_free(struct cpumem *, int, unsigned int);
+void counters_read(struct cpumem *, uint64_t *, unsigned int);
+void counters_zero(struct cpumem *, unsigned int);
+
+#ifdef MULTIPROCESSOR
+static inline uint64_t *
+counters_enter(struct counters_ref *ref, struct cpumem *cm)
+{
+ ref->c = cpumem_enter(cm);
+ ref->g = ++(*ref->c); /* make the generation number odd */
+ return (ref->c + 1);
+}
+
+static inline void
+counters_leave(struct counters_ref *ref, struct cpumem *cm)
+{
+ membar_producer();
+ (*ref->c) = ++ref->g; /* make the generation number even again */
+ cpumem_leave(cm, ref->c);
+}
+#define COUNTERS_BOOT_MEMORY(_name, _n) \
+ CPUMEM_BOOT_MEMORY(_name, ((_n) + 1) * sizeof(uint64_t))
+#else
+static inline uint64_t *
+counters_enter(struct counters_ref *r, struct cpumem *cm)
+{
+ r->c = cpumem_enter(cm);
+ return (r->c);
+}
+
+static inline void
+counters_leave(struct counters_ref *r, struct cpumem *cm)
+{
+ cpumem_leave(cm, r->c);
+}
+
+#define COUNTERS_BOOT_MEMORY(_name, _n)
\
+ CPUMEM_BOOT_MEMORY(_name, (_n) * sizeof(uint64_t))
+#endif
+
+#define COUNTERS_BOOT_INITIALIZER(_name) CPUMEM_BOOT_INITIALIZER(_name)
+
+#endif /* _KERNEL */
+#endif /* _SYS_PERCPU_H_ */