Module Name: src
Committed By: ozaki-r
Date: Thu Feb 2 02:52:10 UTC 2017
Modified Files:
src/sys/net: files.net
src/sys/netinet: ip_carp.c ip_icmp.c
src/sys/netinet6: icmp6.c
src/sys/rump/librump/rumpnet: Makefile.rumpnet
Added Files:
src/sys/netinet: wqinput.c wqinput.h
Log Message:
Defer some pr_input to workqueue
pr_input is currently called in softint. Some pr_input such as ICMP, ICMPv6
and CARP can add/delete/update IP addresses and routing table entries. For
example, icmp6_redirect_input updates an a routing table entry and
nd6_ra_input may delete an IP address.
Basically such operations shouldn't be done in softint. That aside, we have
a reason to avoid the situation; psz/psref waits cannot be used in softint,
however they are required to work in such pr_input in the MP-safe world.
The change implements the workqueue pr_input framework called wqinput which
provides a means to defer pr_input of a protocol to workqueue easily.
Currently icmp_input, icmp6_input, carp_proto_input and carp6_proto_input
are deferred to workqueue by the framework.
Proposed and discussed on tech-kern and tech-net
To generate a diff of this commit:
cvs rdiff -u -r1.11 -r1.12 src/sys/net/files.net
cvs rdiff -u -r1.83 -r1.84 src/sys/netinet/ip_carp.c
cvs rdiff -u -r1.155 -r1.156 src/sys/netinet/ip_icmp.c
cvs rdiff -u -r0 -r1.1 src/sys/netinet/wqinput.c src/sys/netinet/wqinput.h
cvs rdiff -u -r1.206 -r1.207 src/sys/netinet6/icmp6.c
cvs rdiff -u -r1.20 -r1.21 src/sys/rump/librump/rumpnet/Makefile.rumpnet
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/net/files.net
diff -u src/sys/net/files.net:1.11 src/sys/net/files.net:1.12
--- src/sys/net/files.net:1.11 Fri Sep 16 03:10:45 2016
+++ src/sys/net/files.net Thu Feb 2 02:52:10 2017
@@ -1,4 +1,4 @@
-# $NetBSD: files.net,v 1.11 2016/09/16 03:10:45 pgoyette Exp $
+# $NetBSD: files.net,v 1.12 2017/02/02 02:52:10 ozaki-r Exp $
# XXX CLEANUP
define net
@@ -62,6 +62,7 @@ file netinet/ip_carp.c carp & (inet | i
file netinet/ip_ecn.c ipsec | gif | stf
file netinet/ip_encap.c inet | inet6
file netinet/ip_etherip.c etherip & inet
+file netinet/wqinput.c inet | inet6
file netinet6/ip6_etherip.c etherip & inet6
file netinet6/in6_gif.c gif & inet6
Index: src/sys/netinet/ip_carp.c
diff -u src/sys/netinet/ip_carp.c:1.83 src/sys/netinet/ip_carp.c:1.84
--- src/sys/netinet/ip_carp.c:1.83 Mon Jan 16 15:44:47 2017
+++ src/sys/netinet/ip_carp.c Thu Feb 2 02:52:10 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_carp.c,v 1.83 2017/01/16 15:44:47 christos Exp $ */
+/* $NetBSD: ip_carp.c,v 1.84 2017/02/02 02:52:10 ozaki-r Exp $ */
/* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */
/*
@@ -33,7 +33,7 @@
#endif
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.83 2017/01/16 15:44:47 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.84 2017/02/02 02:52:10 ozaki-r Exp $");
/*
* TODO:
@@ -70,6 +70,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v
#include <net/netisr.h>
#include <net/net_stats.h>
#include <netinet/if_inarp.h>
+#include <netinet/wqinput.h>
#if NFDDI > 0
#include <net/if_fddi.h>
@@ -234,6 +235,14 @@ static void carp_ether_purgemulti(struct
static void sysctl_net_inet_carp_setup(struct sysctllog **);
+/* workqueue-based pr_input */
+static struct wqinput *carp_wqinput;
+static void _carp_proto_input(struct mbuf *, int, int);
+#ifdef INET6
+static struct wqinput *carp6_wqinput;
+static void _carp6_proto_input(struct mbuf *, int, int);
+#endif
+
struct if_clone carp_cloner =
IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
@@ -468,19 +477,15 @@ carp_setroute(struct carp_softc *sc, int
* we have rearranged checks order compared to the rfc,
* but it seems more efficient this way or not possible otherwise.
*/
-void
-carp_proto_input(struct mbuf *m, ...)
+static void
+_carp_proto_input(struct mbuf *m, int hlen, int proto)
{
struct ip *ip = mtod(m, struct ip *);
struct carp_softc *sc = NULL;
struct carp_header *ch;
int iplen, len;
- va_list ap;
struct ifnet *rcvif;
- va_start(ap, m);
- va_end(ap);
-
CARP_STATINC(CARP_STAT_IPACKETS);
MCLAIM(m, &carp_proto_mowner_rx);
@@ -542,11 +547,17 @@ carp_proto_input(struct mbuf *m, ...)
carp_proto_input_c(m, ch, AF_INET);
}
+void
+carp_proto_input(struct mbuf *m, ...)
+{
+
+ wqinput_input(carp_wqinput, m, 0, 0);
+}
+
#ifdef INET6
-int
-carp6_proto_input(struct mbuf **mp, int *offp, int proto)
+static void
+_carp6_proto_input(struct mbuf *m, int off, int proto)
{
- struct mbuf *m = *mp;
struct carp_softc *sc = NULL;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct carp_header *ch;
@@ -558,7 +569,7 @@ carp6_proto_input(struct mbuf **mp, int
if (!carp_opts[CARPCTL_ALLOW]) {
m_freem(m);
- return (IPPROTO_DONE);
+ return;
}
rcvif = m_get_rcvif_NOMPSAFE(m);
@@ -569,7 +580,7 @@ carp6_proto_input(struct mbuf **mp, int
CARP_LOG(sc, ("packet received on non-carp interface: %s",
rcvif->if_xname));
m_freem(m);
- return (IPPROTO_DONE);
+ return;
}
/* verify that the IP TTL is 255 */
@@ -578,31 +589,40 @@ carp6_proto_input(struct mbuf **mp, int
CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
CARP_DFLTTL, rcvif->if_xname));
m_freem(m);
- return (IPPROTO_DONE);
+ return;
}
/* verify that we have a complete carp packet */
len = m->m_len;
- IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
+ IP6_EXTHDR_GET(ch, struct carp_header *, m, off, sizeof(*ch));
if (ch == NULL) {
CARP_STATINC(CARP_STAT_BADLEN);
CARP_LOG(sc, ("packet size %u too small", len));
- return (IPPROTO_DONE);
+ return;
}
/* verify the CARP checksum */
- m->m_data += *offp;
+ m->m_data += off;
if (carp_cksum(m, sizeof(*ch))) {
CARP_STATINC(CARP_STAT_BADSUM);
CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
m_freem(m);
- return (IPPROTO_DONE);
+ return;
}
- m->m_data -= *offp;
+ m->m_data -= off;
carp_proto_input_c(m, ch, AF_INET6);
- return (IPPROTO_DONE);
+ return;
+}
+
+int
+carp6_proto_input(struct mbuf **mp, int *offp, int proto)
+{
+
+ wqinput_input(carp6_wqinput, *mp, *offp, proto);
+
+ return IPPROTO_DONE;
}
#endif /* INET6 */
@@ -2342,6 +2362,11 @@ carp_init(void)
MOWNER_ATTACH(&carp_proto6_mowner_rx);
MOWNER_ATTACH(&carp_proto6_mowner_tx);
#endif
+
+ carp_wqinput = wqinput_create("carp", _carp_proto_input);
+#ifdef INET6
+ carp6_wqinput = wqinput_create("carp6", _carp6_proto_input);
+#endif
}
static void
Index: src/sys/netinet/ip_icmp.c
diff -u src/sys/netinet/ip_icmp.c:1.155 src/sys/netinet/ip_icmp.c:1.156
--- src/sys/netinet/ip_icmp.c:1.155 Tue Jan 24 07:09:24 2017
+++ src/sys/netinet/ip_icmp.c Thu Feb 2 02:52:10 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_icmp.c,v 1.155 2017/01/24 07:09:24 ozaki-r Exp $ */
+/* $NetBSD: ip_icmp.c,v 1.156 2017/02/02 02:52:10 ozaki-r Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -94,7 +94,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.155 2017/01/24 07:09:24 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.156 2017/02/02 02:52:10 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_ipsec.h"
@@ -125,6 +125,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v
#include <netinet/in_proto.h>
#include <netinet/icmp_var.h>
#include <netinet/icmp_private.h>
+#include <netinet/wqinput.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -175,6 +176,10 @@ static void icmp_redirect_timeout(struct
static void sysctl_netinet_icmp_setup(struct sysctllog **);
+/* workqueue-based pr_input */
+static struct wqinput *icmp_wqinput;
+static void _icmp_input(struct mbuf *, int, int);
+
void
icmp_init(void)
{
@@ -191,6 +196,7 @@ icmp_init(void)
}
icmpstat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP_NSTATS);
+ icmp_wqinput = wqinput_create("icmp", _icmp_input);
}
/*
@@ -384,10 +390,9 @@ struct sockaddr_in icmpmask = {
/*
* Process a received ICMP message.
*/
-void
-icmp_input(struct mbuf *m, ...)
+static void
+_icmp_input(struct mbuf *m, int hlen, int proto)
{
- int proto;
struct icmp *icp;
struct ip *ip = mtod(m, struct ip *);
int icmplen;
@@ -395,15 +400,8 @@ icmp_input(struct mbuf *m, ...)
struct in_ifaddr *ia;
void *(*ctlfunc)(int, const struct sockaddr *, void *);
int code;
- int hlen;
- va_list ap;
struct rtentry *rt;
- va_start(ap, m);
- hlen = va_arg(ap, int);
- proto = va_arg(ap, int);
- va_end(ap);
-
/*
* Locate icmp structure in mbuf, and check
* that not corrupted and of at least minimum length.
@@ -685,6 +683,20 @@ freeit:
return;
}
+void
+icmp_input(struct mbuf *m, ...)
+{
+ int hlen, proto;
+ va_list ap;
+
+ va_start(ap, m);
+ hlen = va_arg(ap, int);
+ proto = va_arg(ap, int);
+ va_end(ap);
+
+ wqinput_input(icmp_wqinput, m, hlen, proto);
+}
+
/*
* Reflect the ip packet back to the source
*/
Index: src/sys/netinet6/icmp6.c
diff -u src/sys/netinet6/icmp6.c:1.206 src/sys/netinet6/icmp6.c:1.207
--- src/sys/netinet6/icmp6.c:1.206 Mon Jan 16 15:44:47 2017
+++ src/sys/netinet6/icmp6.c Thu Feb 2 02:52:10 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: icmp6.c,v 1.206 2017/01/16 15:44:47 christos Exp $ */
+/* $NetBSD: icmp6.c,v 1.207 2017/02/02 02:52:10 ozaki-r Exp $ */
/* $KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $ */
/*
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.206 2017/01/16 15:44:47 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.207 2017/02/02 02:52:10 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_inet.h"
@@ -90,6 +90,7 @@ __KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip6.h>
+#include <netinet/wqinput.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6_private.h>
#include <netinet/icmp6.h>
@@ -169,6 +170,9 @@ static void icmp6_mtudisc_timeout(struct
static void icmp6_redirect_timeout(struct rtentry *, struct rttimer *);
static void sysctl_net_inet6_icmp6_setup(struct sysctllog **);
+/* workqueue-based pr_input */
+static struct wqinput *icmp6_wqinput;
+static void _icmp6_input(struct mbuf *m, int off, int proto);
void
icmp6_init(void)
@@ -180,6 +184,8 @@ icmp6_init(void)
icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
icmp6stat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP6_NSTATS);
+
+ icmp6_wqinput = wqinput_create("icmp6", _icmp6_input);
}
static void
@@ -444,13 +450,12 @@ icmp6_error(struct mbuf *m, int type, in
/*
* Process a received ICMP6 message.
*/
-int
-icmp6_input(struct mbuf **mp, int *offp, int proto)
+static void
+_icmp6_input(struct mbuf *m, int off, int proto)
{
- struct mbuf *m = *mp, *n;
+ struct mbuf *n;
struct ip6_hdr *ip6, *nip6;
struct icmp6_hdr *icmp6, *nicmp6;
- int off = *offp;
int icmp6len = m->m_pkthdr.len - off;
int code, sum, noff;
struct ifnet *rcvif;
@@ -879,7 +884,7 @@ icmp6_input(struct mbuf **mp, int *offp,
if (icmp6_notify_error(m, off, icmp6len, code)) {
/* In this case, m should've been freed. */
m_put_rcvif_psref(rcvif, &psref);
- return (IPPROTO_DONE);
+ return;
}
break;
@@ -896,11 +901,20 @@ icmp6_input(struct mbuf **mp, int *offp,
/* deliver the packet to appropriate sockets */
icmp6_rip6_input(&m, off);
- return IPPROTO_DONE;
+ return;
freeit:
m_put_rcvif_psref(rcvif, &psref);
m_freem(m);
+ return;
+}
+
+int
+icmp6_input(struct mbuf **mp, int *offp, int proto)
+{
+
+ wqinput_input(icmp6_wqinput, *mp, *offp, proto);
+
return IPPROTO_DONE;
}
Index: src/sys/rump/librump/rumpnet/Makefile.rumpnet
diff -u src/sys/rump/librump/rumpnet/Makefile.rumpnet:1.20 src/sys/rump/librump/rumpnet/Makefile.rumpnet:1.21
--- src/sys/rump/librump/rumpnet/Makefile.rumpnet:1.20 Tue Jan 17 08:10:37 2017
+++ src/sys/rump/librump/rumpnet/Makefile.rumpnet Thu Feb 2 02:52:10 2017
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile.rumpnet,v 1.20 2017/01/17 08:10:37 ozaki-r Exp $
+# $NetBSD: Makefile.rumpnet,v 1.21 2017/02/02 02:52:10 ozaki-r Exp $
#
LIB= rumpnet
@@ -38,6 +38,9 @@ SRCS+=
# bpf stubs, required for all kernels
SRCS+= bpf_stub.c
+# workqueue-based pr_input (required by inet and inet6)
+SRCS+= wqinput.c
+
CPPFLAGS+= -I${RUMPTOP}/librump/rumpkern
.include "${RUMPTOP}/Makefile.rump"
Added files:
Index: src/sys/netinet/wqinput.c
diff -u /dev/null src/sys/netinet/wqinput.c:1.1
--- /dev/null Thu Feb 2 02:52:10 2017
+++ src/sys/netinet/wqinput.c Thu Feb 2 02:52:10 2017
@@ -0,0 +1,267 @@
+/* $NetBSD: wqinput.c,v 1.1 2017/02/02 02:52:10 ozaki-r Exp $ */
+
+/*-
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kmem.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/workqueue.h>
+#include <sys/atomic.h>
+#include <sys/queue.h>
+#include <sys/percpu.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <netinet/wqinput.h>
+
+#define WQINPUT_LIST_MAXLEN IFQ_MAXLEN
+
+struct wqinput_work {
+ struct mbuf *ww_mbuf;
+ int ww_off;
+ int ww_proto;
+ struct wqinput_work *ww_next;
+};
+
+struct wqinput_worklist {
+ /*
+ * XXX: TAILQ cannot be used because TAILQ_INIT memories the address
+ * of percpu data while percpu(9) may move percpu data during bootup.
+ */
+ struct wqinput_work *wwl_head;
+ struct wqinput_work *wwl_tail;
+ unsigned int wwl_len;
+ unsigned long wwl_dropped;
+ struct work wwl_work;
+ bool wwl_wq_is_active;
+};
+
+struct wqinput {
+ struct workqueue *wqi_wq;
+ struct pool wqi_work_pool;
+ struct percpu *wqi_worklists; /* struct wqinput_worklist */
+ void (*wqi_input)(struct mbuf *, int, int);
+};
+
+static void wqinput_work(struct work *, void *);
+static void wqinput_sysctl_setup(const char *, struct wqinput *);
+
+static void
+wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused)
+{
+ struct wqinput_worklist *const wwl = p;
+ int *sum = arg;
+
+ *sum += wwl->wwl_dropped;
+}
+
+static int
+wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)
+{
+ struct sysctlnode node;
+ struct wqinput *wqi;
+ int sum = 0;
+ int error;
+
+ node = *rnode;
+ wqi = node.sysctl_data;
+
+ percpu_foreach(wqi->wqi_worklists, wqinput_drops, &sum);
+
+ node.sysctl_data = ∑
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error != 0 || newp == NULL)
+ return error;
+
+ return 0;
+}
+
+static void
+wqinput_sysctl_setup(const char *name, struct wqinput *wqi)
+{
+ const struct sysctlnode *cnode, *rnode;
+ int error;
+
+ error = sysctl_createv(NULL, 0, NULL, &rnode,
+ CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput",
+ SYSCTL_DESCR("workqueue-based pr_input controls"),
+ NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL);
+ if (error != 0)
+ goto bad;
+
+ error = sysctl_createv(NULL, 0, &rnode, &rnode,
+ CTLFLAG_PERMANENT, CTLTYPE_NODE, name,
+ SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"),
+ NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
+ if (error != 0)
+ goto bad;
+
+ error = sysctl_createv(NULL, 0, &rnode, &rnode,
+ CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq",
+ SYSCTL_DESCR("wqinput input queue controls"),
+ NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
+ if (error != 0)
+ goto bad;
+
+ error = sysctl_createv(NULL, 0, &rnode, &cnode,
+ CTLFLAG_PERMANENT, CTLTYPE_INT, "drops",
+ SYSCTL_DESCR("Total packets dropped due to full input queue"),
+ wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL);
+ if (error != 0)
+ goto bad;
+
+ return;
+bad:
+ log(LOG_ERR, "%s: could not create a sysctl node for %s\n",
+ __func__, name);
+ return;
+}
+
+struct wqinput *
+wqinput_create(const char *name, void (*func)(struct mbuf *, int, int))
+{
+ struct wqinput *wqi;
+ int error;
+ char namebuf[32];
+
+ snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name);
+
+ wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP);
+
+ error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi,
+ PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
+ if (error != 0)
+ panic("%s: workqueue_create failed (%d)\n", __func__, error);
+ pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0,
+ namebuf, NULL, IPL_SOFTNET);
+ wqi->wqi_worklists = percpu_alloc(sizeof(struct wqinput_worklist));
+ wqi->wqi_input = func;
+
+ wqinput_sysctl_setup(name, wqi);
+
+ return wqi;
+}
+
+static struct wqinput_work *
+wqinput_work_get(struct wqinput_worklist *wwl)
+{
+ struct wqinput_work *work;
+
+ /* Must be called at IPL_SOFTNET */
+
+ work = wwl->wwl_head;
+ if (work != NULL) {
+ KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len);
+ wwl->wwl_len--;
+ wwl->wwl_head = work->ww_next;
+ work->ww_next = NULL;
+
+ if (wwl->wwl_head == NULL)
+ wwl->wwl_tail = NULL;
+ } else {
+ KASSERT(wwl->wwl_len == 0);
+ }
+
+ return work;
+}
+
+static void
+wqinput_work(struct work *wk, void *arg)
+{
+ struct wqinput *wqi = arg;
+ struct wqinput_work *work;
+ struct wqinput_worklist *wwl;
+ int s;
+
+ /* Users expect to run at IPL_SOFTNET */
+ s = splsoftnet();
+ /* This also prevents LWP migrations between CPUs */
+ wwl = percpu_getref(wqi->wqi_worklists);
+
+ /* We can allow enqueuing another work at this point */
+ wwl->wwl_wq_is_active = false;
+
+ while ((work = wqinput_work_get(wwl)) != NULL) {
+ mutex_enter(softnet_lock);
+ wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto);
+ mutex_exit(softnet_lock);
+
+ pool_put(&wqi->wqi_work_pool, work);
+ }
+
+ percpu_putref(wqi->wqi_worklists);
+ splx(s);
+}
+
+static void
+wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work)
+{
+
+ if (wwl->wwl_tail != NULL) {
+ wwl->wwl_tail->ww_next = work;
+ } else {
+ wwl->wwl_head = work;
+ }
+ wwl->wwl_tail = work;
+ wwl->wwl_len++;
+}
+
+void
+wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto)
+{
+ struct wqinput_work *work;
+ struct wqinput_worklist *wwl;
+
+ wwl = percpu_getref(wqi->wqi_worklists);
+
+ /* Prevent too much work and mbuf from being queued */
+ if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) {
+ wwl->wwl_dropped++;
+ m_freem(m);
+ goto out;
+ }
+
+ work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT);
+ work->ww_mbuf = m;
+ work->ww_off = off;
+ work->ww_proto = proto;
+ work->ww_next = NULL;
+
+ wqinput_work_put(wwl, work);
+
+ /* Avoid enqueuing another work when one is already enqueued */
+ if (wwl->wwl_wq_is_active)
+ goto out;
+ wwl->wwl_wq_is_active = true;
+
+ workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL);
+out:
+ percpu_putref(wqi->wqi_worklists);
+}
Index: src/sys/netinet/wqinput.h
diff -u /dev/null src/sys/netinet/wqinput.h:1.1
--- /dev/null Thu Feb 2 02:52:10 2017
+++ src/sys/netinet/wqinput.h Thu Feb 2 02:52:10 2017
@@ -0,0 +1,42 @@
+/* $NetBSD: wqinput.h,v 1.1 2017/02/02 02:52:10 ozaki-r Exp $ */
+
+/*-
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NETINET_WQINPUT_H_
+#define _NETINET_WQINPUT_H_
+
+#if !defined(_KERNEL)
+#error "not supposed to be exposed to userland."
+#endif
+
+#include <sys/mbuf.h>
+
+struct wqinput;
+struct wqinput *wqinput_create(const char *, void(*)(struct mbuf *, int, int));
+void wqinput_input(struct wqinput *, struct mbuf *, int, int);
+
+#endif /* _NETINET_WQINPUT_H_ */