Module Name:    src
Committed By:   ozaki-r
Date:           Tue Jan 24 09:05:28 UTC 2017

Modified Files:
        src/sys/dev/ic: hd64570.c midway.c
        src/sys/dev/pci: if_lmc.c if_lmc.h
        src/sys/net: bpf.c bpf.h bpfdesc.h
        src/sys/netisdn: i4b_ipr.c
Added Files:
        src/doc: TODO.smpnet

Log Message:
Defer bpf_mtap in Rx interrupt context to softint

bpf_mtap of some drivers is still called in hardware interrupt context.
We want to run them in softint as well as bpf_mtap of most drivers
(see if_percpuq_softint and if_input).

To this end, bpf_mtap_softint mechanism is implemented; it defers
bpf_mtap processing to a dedicated softint for a target driver.
By using the machanism, we can move bpf_mtap processing to softint
without changing target drivers much while it adds some overhead
on CPU and memory. Once target drivers are changed to softint-based,
we should return to normal bpf_mtap.

Proposed on tech-kern and tech-net


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/doc/TODO.smpnet
cvs rdiff -u -r1.51 -r1.52 src/sys/dev/ic/hd64570.c
cvs rdiff -u -r1.98 -r1.99 src/sys/dev/ic/midway.c
cvs rdiff -u -r1.62 -r1.63 src/sys/dev/pci/if_lmc.c
cvs rdiff -u -r1.23 -r1.24 src/sys/dev/pci/if_lmc.h
cvs rdiff -u -r1.204 -r1.205 src/sys/net/bpf.c
cvs rdiff -u -r1.67 -r1.68 src/sys/net/bpf.h
cvs rdiff -u -r1.39 -r1.40 src/sys/net/bpfdesc.h
cvs rdiff -u -r1.40 -r1.41 src/sys/netisdn/i4b_ipr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ic/hd64570.c
diff -u src/sys/dev/ic/hd64570.c:1.51 src/sys/dev/ic/hd64570.c:1.52
--- src/sys/dev/ic/hd64570.c:1.51	Thu Dec 15 09:35:24 2016
+++ src/sys/dev/ic/hd64570.c	Tue Jan 24 09:05:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: hd64570.c,v 1.51 2016/12/15 09:35:24 ozaki-r Exp $	*/
+/*	$NetBSD: hd64570.c,v 1.52 2017/01/24 09:05:28 ozaki-r Exp $	*/
 
 /*
  * Copyright (c) 1999 Christian E. Hopps
@@ -65,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hd64570.c,v 1.51 2016/12/15 09:35:24 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: hd64570.c,v 1.52 2017/01/24 09:05:28 ozaki-r Exp $");
 
 #include "opt_inet.h"
 
@@ -457,6 +457,7 @@ sca_port_attach(struct sca_softc *sc, u_
 	if_attach(ifp);
 	if_alloc_sadl(ifp);
 	bpf_attach(ifp, DLT_HDLC, HDLC_HDRLEN);
+	bpf_mtap_softint_init(ifp);
 
 	if (sc->sc_parent == NULL)
 		printf("%s: port %d\n", ifp->if_xname, port);
@@ -1574,7 +1575,7 @@ sca_frame_process(sca_port_t *scp)
 		return;
 	}
 
-	bpf_mtap(&scp->sp_if, m); /* XXX not in softint */
+	bpf_mtap_softint(&scp->sp_if, m);
 
 	scp->sp_if.if_ipackets++;
 

Index: src/sys/dev/ic/midway.c
diff -u src/sys/dev/ic/midway.c:1.98 src/sys/dev/ic/midway.c:1.99
--- src/sys/dev/ic/midway.c:1.98	Thu Dec 15 09:35:24 2016
+++ src/sys/dev/ic/midway.c	Tue Jan 24 09:05:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: midway.c,v 1.98 2016/12/15 09:35:24 ozaki-r Exp $	*/
+/*	$NetBSD: midway.c,v 1.99 2017/01/24 09:05:28 ozaki-r Exp $	*/
 /*	(sync'd to midway.c 1.68)	*/
 
 /*
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: midway.c,v 1.98 2016/12/15 09:35:24 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: midway.c,v 1.99 2017/01/24 09:05:28 ozaki-r Exp $");
 
 #include "opt_natm.h"
 
@@ -2766,7 +2766,7 @@ EN_INTR_TYPE en_intr(void *arg)
 	  ifp->if_ipackets++;
 #endif
 
-	  bpf_mtap(ifp, m); /* XXX not in softint */
+	  bpf_mtap_softint(ifp, m);
 
 	  atm_input(ifp, &ah, m, sc->rxslot[slot].rxhand);
 	}
@@ -3623,6 +3623,7 @@ en_pvcattach(struct ifnet *ifp)
 	LIST_INSERT_HEAD(&sc->sif_list, (struct pvcsif *)pvc_ifp, sif_links);
 	if_attach(pvc_ifp);
 	atm_ifattach(pvc_ifp);
+	bpf_mtap_softint_init(pvc_ifp);
 
 #ifdef ATM_PVCEXT
 	rrp_add(sc, pvc_ifp);

Index: src/sys/dev/pci/if_lmc.c
diff -u src/sys/dev/pci/if_lmc.c:1.62 src/sys/dev/pci/if_lmc.c:1.63
--- src/sys/dev/pci/if_lmc.c:1.62	Thu Dec 15 09:35:24 2016
+++ src/sys/dev/pci/if_lmc.c	Tue Jan 24 09:05:28 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: if_lmc.c,v 1.62 2016/12/15 09:35:24 ozaki-r Exp $ */
+/* $NetBSD: if_lmc.c,v 1.63 2017/01/24 09:05:28 ozaki-r Exp $ */
 
 /*-
  * Copyright (c) 2002-2006 David Boggs. <bo...@boggs.palo-alto.ca.us>
@@ -74,7 +74,7 @@
  */
 
 # include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_lmc.c,v 1.62 2016/12/15 09:35:24 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_lmc.c,v 1.63 2017/01/24 09:05:28 ozaki-r Exp $");
 # include <sys/param.h>	/* OS version */
 # include "opt_inet.h"	/* INET6, INET */
 # include "opt_altq_enabled.h" /* ALTQ */
@@ -4294,7 +4294,7 @@ rxintr_cleanup(softc_t *sc)
     sc->status.cntrs.ipackets++;
 
     /* Berkeley Packet Filter */
-    LMC_BPF_MTAP(sc, first_mbuf); /* XXX not in softint */
+    LMC_BPF_MTAP(sc, first_mbuf);
 
     /* Give this good packet to the network stacks. */
     sc->quota--;
@@ -4446,7 +4446,7 @@ txintr_cleanup(softc_t *sc)
         sc->status.cntrs.opackets++;
 
         /* Berkeley Packet Filter */
-        LMC_BPF_MTAP(sc, m); /* XXX not in softint */
+        LMC_BPF_MTAP(sc, m);
 	}
 
       m_freem(m);

Index: src/sys/dev/pci/if_lmc.h
diff -u src/sys/dev/pci/if_lmc.h:1.23 src/sys/dev/pci/if_lmc.h:1.24
--- src/sys/dev/pci/if_lmc.h:1.23	Thu Apr 28 00:16:56 2016
+++ src/sys/dev/pci/if_lmc.h	Tue Jan 24 09:05:28 2017
@@ -1,5 +1,5 @@
 /*-
- * $NetBSD: if_lmc.h,v 1.23 2016/04/28 00:16:56 ozaki-r Exp $
+ * $NetBSD: if_lmc.h,v 1.24 2017/01/24 09:05:28 ozaki-r Exp $
  *
  * Copyright (c) 2002-2006 David Boggs. (bo...@boggs.palo-alto.ca.us)
  * All rights reserved.
@@ -984,8 +984,12 @@ typedef int intr_return_t;
 # define SLEEP(usecs)		tsleep(sc, PZERO, DEVICE_NAME, 1+(usecs/tick))
 # define DMA_SYNC(map, size, flags) bus_dmamap_sync(ring->tag, map, 0, size, flags)
 # define DMA_LOAD(map, addr, size)  bus_dmamap_load(ring->tag, map, addr, size, 0, BUS_DMA_NOWAIT)
-#  define LMC_BPF_MTAP(sc, mbuf)	bpf_mtap((sc)->ifp, mbuf)
-#  define LMC_BPF_ATTACH(sc, dlt, len)	bpf_attach((sc)->ifp, dlt, len)
+#  define LMC_BPF_MTAP(sc, mbuf)	bpf_mtap_softint((sc)->ifp, mbuf)
+#  define LMC_BPF_ATTACH(sc, dlt, len)			\
+	do {						\
+		bpf_attach((sc)->ifp, dlt, len);	\
+		bpf_mtap_softint_init((sc)->ifp);	\
+	} while (0)
 #  define LMC_BPF_DETACH(sc)		bpf_detach((sc)->ifp)
 
 static int driver_announced = 0;	/* print driver info once only */

Index: src/sys/net/bpf.c
diff -u src/sys/net/bpf.c:1.204 src/sys/net/bpf.c:1.205
--- src/sys/net/bpf.c:1.204	Mon Jan 23 10:17:36 2017
+++ src/sys/net/bpf.c	Tue Jan 24 09:05:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: bpf.c,v 1.204 2017/01/23 10:17:36 ozaki-r Exp $	*/
+/*	$NetBSD: bpf.c,v 1.205 2017/01/24 09:05:28 ozaki-r Exp $	*/
 
 /*
  * Copyright (c) 1990, 1991, 1993
@@ -39,12 +39,13 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.204 2017/01/23 10:17:36 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.205 2017/01/24 09:05:28 ozaki-r Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_bpf.h"
 #include "sl.h"
 #include "strip.h"
+#include "opt_net_mpsafe.h"
 #endif
 
 #include <sys/param.h>
@@ -60,6 +61,7 @@ __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.20
 #include <sys/stat.h>
 #include <sys/module.h>
 #include <sys/atomic.h>
+#include <sys/cpu.h>
 
 #include <sys/file.h>
 #include <sys/filedesc.h>
@@ -73,6 +75,7 @@ __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.20
 #include <sys/poll.h>
 #include <sys/sysctl.h>
 #include <sys/kauth.h>
+#include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/slip.h>
@@ -1594,6 +1597,92 @@ _bpf_mtap_sl_out(struct bpf_if *bp, u_ch
 	m_freem(m);
 }
 
+static struct mbuf *
+bpf_mbuf_enqueue(struct bpf_if *bp, struct mbuf *m)
+{
+	struct mbuf *dup;
+
+	dup = m_dup(m, 0, M_COPYALL, M_NOWAIT);
+	if (dup == NULL)
+		return NULL;
+
+	if (bp->bif_mbuf_tail != NULL) {
+		bp->bif_mbuf_tail->m_nextpkt = dup;
+	} else {
+		bp->bif_mbuf_head = dup;
+	}
+	bp->bif_mbuf_tail = dup;
+#ifdef BPF_MTAP_SOFTINT_DEBUG
+	log(LOG_DEBUG, "%s: enqueued mbuf=%p to %s\n",
+	    __func__, dup, bp->bif_ifp->if_xname);
+#endif
+
+	return dup;
+}
+
+static struct mbuf *
+bpf_mbuf_dequeue(struct bpf_if *bp)
+{
+	struct mbuf *m;
+	int s;
+
+	s = splnet();
+	m = bp->bif_mbuf_head;
+	if (m != NULL) {
+		bp->bif_mbuf_head = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+
+		if (bp->bif_mbuf_head == NULL)
+			bp->bif_mbuf_tail = NULL;
+#ifdef BPF_MTAP_SOFTINT_DEBUG
+		log(LOG_DEBUG, "%s: dequeued mbuf=%p from %s\n",
+		    __func__, m, bp->bif_ifp->if_xname);
+#endif
+	}
+	splx(s);
+
+	return m;
+}
+
+static void
+bpf_mtap_si(void *arg)
+{
+	struct bpf_if *bp = arg;
+	struct mbuf *m;
+
+	while ((m = bpf_mbuf_dequeue(bp)) != NULL) {
+#ifdef BPF_MTAP_SOFTINT_DEBUG
+		log(LOG_DEBUG, "%s: tapping mbuf=%p on %s\n",
+		    __func__, m, bp->bif_ifp->if_xname);
+#endif
+#ifndef NET_MPSAFE
+		KERNEL_LOCK(1, NULL);
+#endif
+		bpf_ops->bpf_mtap(bp, m);
+#ifndef NET_MPSAFE
+		KERNEL_UNLOCK_ONE(NULL);
+#endif
+		m_freem(m);
+	}
+}
+
+void
+bpf_mtap_softint(struct ifnet *ifp, struct mbuf *m)
+{
+	struct bpf_if *bp = ifp->if_bpf;
+	struct mbuf *dup;
+
+	KASSERT(cpu_intr_p());
+
+	if (bp == NULL || bp->bif_dlist == NULL)
+		return;
+	KASSERT(bp->bif_si != NULL);
+
+	dup = bpf_mbuf_enqueue(bp, m);
+	if (dup != NULL)
+		softint_schedule(bp->bif_si);
+}
+
 static int
 bpf_hdrlen(struct bpf_d *d)
 {
@@ -1790,6 +1879,7 @@ _bpfattach(struct ifnet *ifp, u_int dlt,
 	bp->bif_driverp = driverp;
 	bp->bif_ifp = ifp;
 	bp->bif_dlt = dlt;
+	bp->bif_si = NULL;
 
 	bp->bif_next = bpf_iflist;
 	bpf_iflist = bp;
@@ -1803,6 +1893,29 @@ _bpfattach(struct ifnet *ifp, u_int dlt,
 #endif
 }
 
+void
+bpf_mtap_softint_init(struct ifnet *ifp)
+{
+	struct bpf_if *bp;
+
+	mutex_enter(&bpf_mtx);
+	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
+		if (bp->bif_ifp != ifp)
+			continue;
+
+		bp->bif_mbuf_head = NULL;
+		bp->bif_mbuf_tail = NULL;
+		bp->bif_si = softint_establish(SOFTINT_NET, bpf_mtap_si, bp);
+		if (bp->bif_si == NULL)
+			panic("%s: softint_establish() failed", __func__);
+		break;
+	}
+	mutex_exit(&bpf_mtx);
+
+	if (bp == NULL)
+		panic("%s: no bpf_if found for %s", __func__, ifp->if_xname);
+}
+
 /*
  * Remove an interface from bpf.
  */
@@ -1833,6 +1946,16 @@ _bpfdetach(struct ifnet *ifp)
 	     bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) {
 		if (bp->bif_ifp == ifp) {
 			*pbp = bp->bif_next;
+			if (bp->bif_si != NULL) {
+				s = splnet();
+				while (bp->bif_mbuf_head != NULL) {
+					struct mbuf *m = bp->bif_mbuf_head;
+					bp->bif_mbuf_head = m->m_nextpkt;
+					m_freem(m);
+				}
+				splx(s);
+				softint_disestablish(bp->bif_si);
+			}
 			free(bp, M_DEVBUF);
 			goto again;
 		}

Index: src/sys/net/bpf.h
diff -u src/sys/net/bpf.h:1.67 src/sys/net/bpf.h:1.68
--- src/sys/net/bpf.h:1.67	Sat Sep  5 20:01:21 2015
+++ src/sys/net/bpf.h	Tue Jan 24 09:05:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: bpf.h,v 1.67 2015/09/05 20:01:21 dholland Exp $	*/
+/*	$NetBSD: bpf.h,v 1.68 2017/01/24 09:05:28 ozaki-r Exp $	*/
 
 /*
  * Copyright (c) 1990, 1991, 1993
@@ -517,6 +517,9 @@ int	bpf_validate_ext(const bpf_ctx_t *, 
 bpfjit_func_t bpf_jit_generate(bpf_ctx_t *, void *, size_t);
 void	bpf_jit_freecode(bpfjit_func_t);
 
+void	bpf_mtap_softint_init(struct ifnet *);
+void	bpf_mtap_softint(struct ifnet *, struct mbuf *);
+
 #endif
 
 int	bpf_validate(const struct bpf_insn *, int);

Index: src/sys/net/bpfdesc.h
diff -u src/sys/net/bpfdesc.h:1.39 src/sys/net/bpfdesc.h:1.40
--- src/sys/net/bpfdesc.h:1.39	Mon Jan 23 10:17:36 2017
+++ src/sys/net/bpfdesc.h	Tue Jan 24 09:05:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: bpfdesc.h,v 1.39 2017/01/23 10:17:36 ozaki-r Exp $	*/
+/*	$NetBSD: bpfdesc.h,v 1.40 2017/01/24 09:05:28 ozaki-r Exp $	*/
 
 /*
  * Copyright (c) 1990, 1991, 1993
@@ -139,6 +139,9 @@ struct bpf_if {
 	u_int bif_dlt;			/* link layer type */
 	u_int bif_hdrlen;		/* length of header (with padding) */
 	struct ifnet *bif_ifp;		/* corresponding interface */
+	void *bif_si;
+	struct mbuf *bif_mbuf_head;
+	struct mbuf *bif_mbuf_tail;
 };
 
 #endif /* !_NET_BPFDESC_H_ */

Index: src/sys/netisdn/i4b_ipr.c
diff -u src/sys/netisdn/i4b_ipr.c:1.40 src/sys/netisdn/i4b_ipr.c:1.41
--- src/sys/netisdn/i4b_ipr.c:1.40	Thu Dec 15 09:35:24 2016
+++ src/sys/netisdn/i4b_ipr.c	Tue Jan 24 09:05:28 2017
@@ -27,7 +27,7 @@
  *	i4b_ipr.c - isdn4bsd IP over raw HDLC ISDN network driver
  *	---------------------------------------------------------
  *
- *	$Id: i4b_ipr.c,v 1.40 2016/12/15 09:35:24 ozaki-r Exp $
+ *	$Id: i4b_ipr.c,v 1.41 2017/01/24 09:05:28 ozaki-r Exp $
  *
  * $FreeBSD$
  *
@@ -59,7 +59,7 @@
  *---------------------------------------------------------------------------*/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: i4b_ipr.c,v 1.40 2016/12/15 09:35:24 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: i4b_ipr.c,v 1.41 2017/01/24 09:05:28 ozaki-r Exp $");
 
 #include "irip.h"
 #include "opt_irip.h"
@@ -406,6 +406,7 @@ iripattach(void)
 		bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int));
 #else
 		bpf_attach(&sc->sc_if, DLT_NULL, sizeof(u_int));
+		bpf_mtap_softint_init(&sc->sc_if);
 #endif
 #endif
 	}
@@ -1070,7 +1071,7 @@ error:
 		mm.m_len = 4;
 		mm.m_data = (char *)&af;
 
-		bpf_mtap(&sc->sc_if, &mm); /* XXX not in softint */
+		bpf_mtap_softint(&sc->sc_if, &mm);
 	}
 #endif /* NBPFILTER > 0  || NBPF > 0 */
 

Added files:

Index: src/doc/TODO.smpnet
diff -u /dev/null src/doc/TODO.smpnet:1.1
--- /dev/null	Tue Jan 24 09:05:28 2017
+++ src/doc/TODO.smpnet	Tue Jan 24 09:05:27 2017
@@ -0,0 +1,28 @@
+$NetBSD: TODO.smpnet,v 1.1 2017/01/24 09:05:27 ozaki-r Exp $
+
+Non MP-safe components
+======================
+
+ - bpf
+ - To be listed more...
+
+bpf
+===
+
+MP-ification of bpf requires all of bpf_mtap* are called in normal LWP context
+or softint context, i.e., not in hardware interrupt context.  For Tx, all
+bpf_mtap satisfy the requrement.  For Rx, most of bpf_mtap are called in softint.
+Unfortunately some bpf_mtap on Rx are still called in hardware interrupt context.
+
+This is the list of the functions that have such bpf_mtap:
+
+ - sca_frame_process() @ sys/dev/ic/hd64570.c
+ - en_intr() @ sys/dev/ic/midway.c
+ - rxintr_cleanup() and txintr_cleanup() @ sys/dev/pci/if_lmc.c
+ - ipr_rx_data_rdy() @ sys/netisdn/i4b_ipr.c
+
+Ideally we should make the functions run in softint somehow, but we don't have
+actual devices, no time (or interest/love) to work on the task, so instead we
+provide a deferred bpf_mtap mechanism that forcibly runs bpf_mtap in softint
+context.  It's a workaround and once the functions run in softint, we should use
+the original bpf_mtap again.

Reply via email to