Module Name: src Committed By: ozaki-r Date: Tue Jan 24 09:05:28 UTC 2017
Modified Files: src/sys/dev/ic: hd64570.c midway.c src/sys/dev/pci: if_lmc.c if_lmc.h src/sys/net: bpf.c bpf.h bpfdesc.h src/sys/netisdn: i4b_ipr.c Added Files: src/doc: TODO.smpnet Log Message: Defer bpf_mtap in Rx interrupt context to softint bpf_mtap of some drivers is still called in hardware interrupt context. We want to run them in softint as well as bpf_mtap of most drivers (see if_percpuq_softint and if_input). To this end, bpf_mtap_softint mechanism is implemented; it defers bpf_mtap processing to a dedicated softint for a target driver. By using the machanism, we can move bpf_mtap processing to softint without changing target drivers much while it adds some overhead on CPU and memory. Once target drivers are changed to softint-based, we should return to normal bpf_mtap. Proposed on tech-kern and tech-net To generate a diff of this commit: cvs rdiff -u -r0 -r1.1 src/doc/TODO.smpnet cvs rdiff -u -r1.51 -r1.52 src/sys/dev/ic/hd64570.c cvs rdiff -u -r1.98 -r1.99 src/sys/dev/ic/midway.c cvs rdiff -u -r1.62 -r1.63 src/sys/dev/pci/if_lmc.c cvs rdiff -u -r1.23 -r1.24 src/sys/dev/pci/if_lmc.h cvs rdiff -u -r1.204 -r1.205 src/sys/net/bpf.c cvs rdiff -u -r1.67 -r1.68 src/sys/net/bpf.h cvs rdiff -u -r1.39 -r1.40 src/sys/net/bpfdesc.h cvs rdiff -u -r1.40 -r1.41 src/sys/netisdn/i4b_ipr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/ic/hd64570.c diff -u src/sys/dev/ic/hd64570.c:1.51 src/sys/dev/ic/hd64570.c:1.52 --- src/sys/dev/ic/hd64570.c:1.51 Thu Dec 15 09:35:24 2016 +++ src/sys/dev/ic/hd64570.c Tue Jan 24 09:05:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: hd64570.c,v 1.51 2016/12/15 09:35:24 ozaki-r Exp $ */ +/* $NetBSD: hd64570.c,v 1.52 2017/01/24 09:05:28 ozaki-r Exp $ */ /* * Copyright (c) 1999 Christian E. Hopps @@ -65,7 +65,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: hd64570.c,v 1.51 2016/12/15 09:35:24 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: hd64570.c,v 1.52 2017/01/24 09:05:28 ozaki-r Exp $"); #include "opt_inet.h" @@ -457,6 +457,7 @@ sca_port_attach(struct sca_softc *sc, u_ if_attach(ifp); if_alloc_sadl(ifp); bpf_attach(ifp, DLT_HDLC, HDLC_HDRLEN); + bpf_mtap_softint_init(ifp); if (sc->sc_parent == NULL) printf("%s: port %d\n", ifp->if_xname, port); @@ -1574,7 +1575,7 @@ sca_frame_process(sca_port_t *scp) return; } - bpf_mtap(&scp->sp_if, m); /* XXX not in softint */ + bpf_mtap_softint(&scp->sp_if, m); scp->sp_if.if_ipackets++; Index: src/sys/dev/ic/midway.c diff -u src/sys/dev/ic/midway.c:1.98 src/sys/dev/ic/midway.c:1.99 --- src/sys/dev/ic/midway.c:1.98 Thu Dec 15 09:35:24 2016 +++ src/sys/dev/ic/midway.c Tue Jan 24 09:05:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: midway.c,v 1.98 2016/12/15 09:35:24 ozaki-r Exp $ */ +/* $NetBSD: midway.c,v 1.99 2017/01/24 09:05:28 ozaki-r Exp $ */ /* (sync'd to midway.c 1.68) */ /* @@ -61,7 +61,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: midway.c,v 1.98 2016/12/15 09:35:24 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: midway.c,v 1.99 2017/01/24 09:05:28 ozaki-r Exp $"); #include "opt_natm.h" @@ -2766,7 +2766,7 @@ EN_INTR_TYPE en_intr(void *arg) ifp->if_ipackets++; #endif - bpf_mtap(ifp, m); /* XXX not in softint */ + bpf_mtap_softint(ifp, m); atm_input(ifp, &ah, m, sc->rxslot[slot].rxhand); } @@ -3623,6 +3623,7 @@ en_pvcattach(struct ifnet *ifp) LIST_INSERT_HEAD(&sc->sif_list, (struct pvcsif *)pvc_ifp, sif_links); if_attach(pvc_ifp); atm_ifattach(pvc_ifp); + bpf_mtap_softint_init(pvc_ifp); #ifdef ATM_PVCEXT rrp_add(sc, pvc_ifp); Index: src/sys/dev/pci/if_lmc.c diff -u src/sys/dev/pci/if_lmc.c:1.62 src/sys/dev/pci/if_lmc.c:1.63 --- src/sys/dev/pci/if_lmc.c:1.62 Thu Dec 15 09:35:24 2016 +++ src/sys/dev/pci/if_lmc.c Tue Jan 24 09:05:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: if_lmc.c,v 1.62 2016/12/15 09:35:24 ozaki-r Exp $ */ +/* $NetBSD: if_lmc.c,v 1.63 2017/01/24 09:05:28 ozaki-r Exp $ */ /*- * Copyright (c) 2002-2006 David Boggs. <bo...@boggs.palo-alto.ca.us> @@ -74,7 +74,7 @@ */ # include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: if_lmc.c,v 1.62 2016/12/15 09:35:24 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_lmc.c,v 1.63 2017/01/24 09:05:28 ozaki-r Exp $"); # include <sys/param.h> /* OS version */ # include "opt_inet.h" /* INET6, INET */ # include "opt_altq_enabled.h" /* ALTQ */ @@ -4294,7 +4294,7 @@ rxintr_cleanup(softc_t *sc) sc->status.cntrs.ipackets++; /* Berkeley Packet Filter */ - LMC_BPF_MTAP(sc, first_mbuf); /* XXX not in softint */ + LMC_BPF_MTAP(sc, first_mbuf); /* Give this good packet to the network stacks. */ sc->quota--; @@ -4446,7 +4446,7 @@ txintr_cleanup(softc_t *sc) sc->status.cntrs.opackets++; /* Berkeley Packet Filter */ - LMC_BPF_MTAP(sc, m); /* XXX not in softint */ + LMC_BPF_MTAP(sc, m); } m_freem(m); Index: src/sys/dev/pci/if_lmc.h diff -u src/sys/dev/pci/if_lmc.h:1.23 src/sys/dev/pci/if_lmc.h:1.24 --- src/sys/dev/pci/if_lmc.h:1.23 Thu Apr 28 00:16:56 2016 +++ src/sys/dev/pci/if_lmc.h Tue Jan 24 09:05:28 2017 @@ -1,5 +1,5 @@ /*- - * $NetBSD: if_lmc.h,v 1.23 2016/04/28 00:16:56 ozaki-r Exp $ + * $NetBSD: if_lmc.h,v 1.24 2017/01/24 09:05:28 ozaki-r Exp $ * * Copyright (c) 2002-2006 David Boggs. (bo...@boggs.palo-alto.ca.us) * All rights reserved. @@ -984,8 +984,12 @@ typedef int intr_return_t; # define SLEEP(usecs) tsleep(sc, PZERO, DEVICE_NAME, 1+(usecs/tick)) # define DMA_SYNC(map, size, flags) bus_dmamap_sync(ring->tag, map, 0, size, flags) # define DMA_LOAD(map, addr, size) bus_dmamap_load(ring->tag, map, addr, size, 0, BUS_DMA_NOWAIT) -# define LMC_BPF_MTAP(sc, mbuf) bpf_mtap((sc)->ifp, mbuf) -# define LMC_BPF_ATTACH(sc, dlt, len) bpf_attach((sc)->ifp, dlt, len) +# define LMC_BPF_MTAP(sc, mbuf) bpf_mtap_softint((sc)->ifp, mbuf) +# define LMC_BPF_ATTACH(sc, dlt, len) \ + do { \ + bpf_attach((sc)->ifp, dlt, len); \ + bpf_mtap_softint_init((sc)->ifp); \ + } while (0) # define LMC_BPF_DETACH(sc) bpf_detach((sc)->ifp) static int driver_announced = 0; /* print driver info once only */ Index: src/sys/net/bpf.c diff -u src/sys/net/bpf.c:1.204 src/sys/net/bpf.c:1.205 --- src/sys/net/bpf.c:1.204 Mon Jan 23 10:17:36 2017 +++ src/sys/net/bpf.c Tue Jan 24 09:05:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: bpf.c,v 1.204 2017/01/23 10:17:36 ozaki-r Exp $ */ +/* $NetBSD: bpf.c,v 1.205 2017/01/24 09:05:28 ozaki-r Exp $ */ /* * Copyright (c) 1990, 1991, 1993 @@ -39,12 +39,13 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.204 2017/01/23 10:17:36 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.205 2017/01/24 09:05:28 ozaki-r Exp $"); #if defined(_KERNEL_OPT) #include "opt_bpf.h" #include "sl.h" #include "strip.h" +#include "opt_net_mpsafe.h" #endif #include <sys/param.h> @@ -60,6 +61,7 @@ __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.20 #include <sys/stat.h> #include <sys/module.h> #include <sys/atomic.h> +#include <sys/cpu.h> #include <sys/file.h> #include <sys/filedesc.h> @@ -73,6 +75,7 @@ __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.20 #include <sys/poll.h> #include <sys/sysctl.h> #include <sys/kauth.h> +#include <sys/syslog.h> #include <net/if.h> #include <net/slip.h> @@ -1594,6 +1597,92 @@ _bpf_mtap_sl_out(struct bpf_if *bp, u_ch m_freem(m); } +static struct mbuf * +bpf_mbuf_enqueue(struct bpf_if *bp, struct mbuf *m) +{ + struct mbuf *dup; + + dup = m_dup(m, 0, M_COPYALL, M_NOWAIT); + if (dup == NULL) + return NULL; + + if (bp->bif_mbuf_tail != NULL) { + bp->bif_mbuf_tail->m_nextpkt = dup; + } else { + bp->bif_mbuf_head = dup; + } + bp->bif_mbuf_tail = dup; +#ifdef BPF_MTAP_SOFTINT_DEBUG + log(LOG_DEBUG, "%s: enqueued mbuf=%p to %s\n", + __func__, dup, bp->bif_ifp->if_xname); +#endif + + return dup; +} + +static struct mbuf * +bpf_mbuf_dequeue(struct bpf_if *bp) +{ + struct mbuf *m; + int s; + + s = splnet(); + m = bp->bif_mbuf_head; + if (m != NULL) { + bp->bif_mbuf_head = m->m_nextpkt; + m->m_nextpkt = NULL; + + if (bp->bif_mbuf_head == NULL) + bp->bif_mbuf_tail = NULL; +#ifdef BPF_MTAP_SOFTINT_DEBUG + log(LOG_DEBUG, "%s: dequeued mbuf=%p from %s\n", + __func__, m, bp->bif_ifp->if_xname); +#endif + } + splx(s); + + return m; +} + +static void +bpf_mtap_si(void *arg) +{ + struct bpf_if *bp = arg; + struct mbuf *m; + + while ((m = bpf_mbuf_dequeue(bp)) != NULL) { +#ifdef BPF_MTAP_SOFTINT_DEBUG + log(LOG_DEBUG, "%s: tapping mbuf=%p on %s\n", + __func__, m, bp->bif_ifp->if_xname); +#endif +#ifndef NET_MPSAFE + KERNEL_LOCK(1, NULL); +#endif + bpf_ops->bpf_mtap(bp, m); +#ifndef NET_MPSAFE + KERNEL_UNLOCK_ONE(NULL); +#endif + m_freem(m); + } +} + +void +bpf_mtap_softint(struct ifnet *ifp, struct mbuf *m) +{ + struct bpf_if *bp = ifp->if_bpf; + struct mbuf *dup; + + KASSERT(cpu_intr_p()); + + if (bp == NULL || bp->bif_dlist == NULL) + return; + KASSERT(bp->bif_si != NULL); + + dup = bpf_mbuf_enqueue(bp, m); + if (dup != NULL) + softint_schedule(bp->bif_si); +} + static int bpf_hdrlen(struct bpf_d *d) { @@ -1790,6 +1879,7 @@ _bpfattach(struct ifnet *ifp, u_int dlt, bp->bif_driverp = driverp; bp->bif_ifp = ifp; bp->bif_dlt = dlt; + bp->bif_si = NULL; bp->bif_next = bpf_iflist; bpf_iflist = bp; @@ -1803,6 +1893,29 @@ _bpfattach(struct ifnet *ifp, u_int dlt, #endif } +void +bpf_mtap_softint_init(struct ifnet *ifp) +{ + struct bpf_if *bp; + + mutex_enter(&bpf_mtx); + for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { + if (bp->bif_ifp != ifp) + continue; + + bp->bif_mbuf_head = NULL; + bp->bif_mbuf_tail = NULL; + bp->bif_si = softint_establish(SOFTINT_NET, bpf_mtap_si, bp); + if (bp->bif_si == NULL) + panic("%s: softint_establish() failed", __func__); + break; + } + mutex_exit(&bpf_mtx); + + if (bp == NULL) + panic("%s: no bpf_if found for %s", __func__, ifp->if_xname); +} + /* * Remove an interface from bpf. */ @@ -1833,6 +1946,16 @@ _bpfdetach(struct ifnet *ifp) bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) { if (bp->bif_ifp == ifp) { *pbp = bp->bif_next; + if (bp->bif_si != NULL) { + s = splnet(); + while (bp->bif_mbuf_head != NULL) { + struct mbuf *m = bp->bif_mbuf_head; + bp->bif_mbuf_head = m->m_nextpkt; + m_freem(m); + } + splx(s); + softint_disestablish(bp->bif_si); + } free(bp, M_DEVBUF); goto again; } Index: src/sys/net/bpf.h diff -u src/sys/net/bpf.h:1.67 src/sys/net/bpf.h:1.68 --- src/sys/net/bpf.h:1.67 Sat Sep 5 20:01:21 2015 +++ src/sys/net/bpf.h Tue Jan 24 09:05:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: bpf.h,v 1.67 2015/09/05 20:01:21 dholland Exp $ */ +/* $NetBSD: bpf.h,v 1.68 2017/01/24 09:05:28 ozaki-r Exp $ */ /* * Copyright (c) 1990, 1991, 1993 @@ -517,6 +517,9 @@ int bpf_validate_ext(const bpf_ctx_t *, bpfjit_func_t bpf_jit_generate(bpf_ctx_t *, void *, size_t); void bpf_jit_freecode(bpfjit_func_t); +void bpf_mtap_softint_init(struct ifnet *); +void bpf_mtap_softint(struct ifnet *, struct mbuf *); + #endif int bpf_validate(const struct bpf_insn *, int); Index: src/sys/net/bpfdesc.h diff -u src/sys/net/bpfdesc.h:1.39 src/sys/net/bpfdesc.h:1.40 --- src/sys/net/bpfdesc.h:1.39 Mon Jan 23 10:17:36 2017 +++ src/sys/net/bpfdesc.h Tue Jan 24 09:05:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: bpfdesc.h,v 1.39 2017/01/23 10:17:36 ozaki-r Exp $ */ +/* $NetBSD: bpfdesc.h,v 1.40 2017/01/24 09:05:28 ozaki-r Exp $ */ /* * Copyright (c) 1990, 1991, 1993 @@ -139,6 +139,9 @@ struct bpf_if { u_int bif_dlt; /* link layer type */ u_int bif_hdrlen; /* length of header (with padding) */ struct ifnet *bif_ifp; /* corresponding interface */ + void *bif_si; + struct mbuf *bif_mbuf_head; + struct mbuf *bif_mbuf_tail; }; #endif /* !_NET_BPFDESC_H_ */ Index: src/sys/netisdn/i4b_ipr.c diff -u src/sys/netisdn/i4b_ipr.c:1.40 src/sys/netisdn/i4b_ipr.c:1.41 --- src/sys/netisdn/i4b_ipr.c:1.40 Thu Dec 15 09:35:24 2016 +++ src/sys/netisdn/i4b_ipr.c Tue Jan 24 09:05:28 2017 @@ -27,7 +27,7 @@ * i4b_ipr.c - isdn4bsd IP over raw HDLC ISDN network driver * --------------------------------------------------------- * - * $Id: i4b_ipr.c,v 1.40 2016/12/15 09:35:24 ozaki-r Exp $ + * $Id: i4b_ipr.c,v 1.41 2017/01/24 09:05:28 ozaki-r Exp $ * * $FreeBSD$ * @@ -59,7 +59,7 @@ *---------------------------------------------------------------------------*/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: i4b_ipr.c,v 1.40 2016/12/15 09:35:24 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: i4b_ipr.c,v 1.41 2017/01/24 09:05:28 ozaki-r Exp $"); #include "irip.h" #include "opt_irip.h" @@ -406,6 +406,7 @@ iripattach(void) bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int)); #else bpf_attach(&sc->sc_if, DLT_NULL, sizeof(u_int)); + bpf_mtap_softint_init(&sc->sc_if); #endif #endif } @@ -1070,7 +1071,7 @@ error: mm.m_len = 4; mm.m_data = (char *)⁡ - bpf_mtap(&sc->sc_if, &mm); /* XXX not in softint */ + bpf_mtap_softint(&sc->sc_if, &mm); } #endif /* NBPFILTER > 0 || NBPF > 0 */ Added files: Index: src/doc/TODO.smpnet diff -u /dev/null src/doc/TODO.smpnet:1.1 --- /dev/null Tue Jan 24 09:05:28 2017 +++ src/doc/TODO.smpnet Tue Jan 24 09:05:27 2017 @@ -0,0 +1,28 @@ +$NetBSD: TODO.smpnet,v 1.1 2017/01/24 09:05:27 ozaki-r Exp $ + +Non MP-safe components +====================== + + - bpf + - To be listed more... + +bpf +=== + +MP-ification of bpf requires all of bpf_mtap* are called in normal LWP context +or softint context, i.e., not in hardware interrupt context. For Tx, all +bpf_mtap satisfy the requrement. For Rx, most of bpf_mtap are called in softint. +Unfortunately some bpf_mtap on Rx are still called in hardware interrupt context. + +This is the list of the functions that have such bpf_mtap: + + - sca_frame_process() @ sys/dev/ic/hd64570.c + - en_intr() @ sys/dev/ic/midway.c + - rxintr_cleanup() and txintr_cleanup() @ sys/dev/pci/if_lmc.c + - ipr_rx_data_rdy() @ sys/netisdn/i4b_ipr.c + +Ideally we should make the functions run in softint somehow, but we don't have +actual devices, no time (or interest/love) to work on the task, so instead we +provide a deferred bpf_mtap mechanism that forcibly runs bpf_mtap in softint +context. It's a workaround and once the functions run in softint, we should use +the original bpf_mtap again.