Here's the big scary diff I've been using for some months now to stop
grabbing the KERNEL_LOCK() in bpf_mtap(9).  This has been originally
written to prevent lock ordering inside pf_test().  Now that we're
heading toward using a rwlock, we won't have this problem, but fewer
usages of KERNEL_LOCK() is still interesting.

I'm going to split this diff in small chunks to ease the review.  But
I'd appreciate if people could try to break it, test & report back.

Some notes:

  - Now that selwakeup() is called in a thread context (task) we only
    rely on the KERNEL_LOCK() to serialize access to kqueue(9) data.

  - The reference counting is here to make sure a descriptor is not
    freed during a sleep.  That's why the KERNEL_LOCK() is still
    necessary in the slow path.  On the other hand bpf_catchpacket()
    relies on the reference guaranteed by the SRP list.

  - A mutex now protects the rotating buffers and their associated
    fields.  It is dropped before calling ifpromisc() because USB
    devices sleep.

  - The dance around uiomove(9) is here to check that buffers aren't
    rotated while data is copied to userland.  Setting ``b->bd_fbuf''
    to NULL should be enough to let bpf_catchpacket() to drop the
    patcket.  But I added ``__in_uiomove'' to be able to have usable
    panic if something weird happen.

Comments?

Index: net/bpf.c
===================================================================
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.149
diff -u -p -r1.149 bpf.c
--- net/bpf.c   12 Sep 2016 16:24:37 -0000      1.149
+++ net/bpf.c   13 Sep 2016 09:56:18 -0000
@@ -92,15 +92,13 @@ int bpf_maxbufsize = BPF_MAXBUFSIZE;
 struct bpf_if  *bpf_iflist;
 LIST_HEAD(, bpf_d) bpf_d_list;
 
-void   bpf_allocbufs(struct bpf_d *);
+int    bpf_allocbufs(struct bpf_d *);
 void   bpf_ifname(struct ifnet *, struct ifreq *);
 int    _bpf_mtap(caddr_t, const struct mbuf *, u_int,
            void (*)(const void *, void *, size_t));
 void   bpf_mcopy(const void *, void *, size_t);
 int    bpf_movein(struct uio *, u_int, struct mbuf **,
            struct sockaddr *, struct bpf_insn *);
-void   bpf_attachd(struct bpf_d *, struct bpf_if *);
-void   bpf_detachd(struct bpf_d *);
 int    bpf_setif(struct bpf_d *, struct ifreq *);
 int    bpfpoll(dev_t, int, struct proc *);
 int    bpfkqfilter(dev_t, struct knote *);
@@ -108,7 +106,6 @@ void        bpf_wakeup(struct bpf_d *);
 void   bpf_wakeup_cb(void *);
 void   bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
            void (*)(const void *, void *, size_t), struct timeval *);
-void   bpf_reset_d(struct bpf_d *);
 int    bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 int    bpf_setdlt(struct bpf_d *, u_int);
 
@@ -120,6 +117,13 @@ int        bpf_sysctl_locked(int *, u_int, void
 struct bpf_d *bpfilter_lookup(int);
 
 /*
+ * Called holding ``bd_mtx''.
+ */
+void   bpf_attachd(struct bpf_d *, struct bpf_if *);
+void   bpf_detachd(struct bpf_d *);
+void   bpf_resetd(struct bpf_d *);
+
+/*
  * Reference count access to descriptor buffers
  */
 void   bpf_get(struct bpf_d *);
@@ -259,11 +263,12 @@ bpf_movein(struct uio *uio, u_int linkty
 
 /*
  * Attach file to the bpf interface, i.e. make d listen on bp.
- * Must be called at splnet.
  */
 void
 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
+       MUTEX_ASSERT_LOCKED(&d->bd_mtx);
+
        /*
         * Point d at bp, and add d to the interface's list of listeners.
         * Finally, point the driver's bpf cookie at the interface so
@@ -286,7 +291,23 @@ bpf_detachd(struct bpf_d *d)
 {
        struct bpf_if *bp;
 
+       MUTEX_ASSERT_LOCKED(&d->bd_mtx);
+
        bp = d->bd_bif;
+
+       /* Remove d from the interface's descriptor list. */
+       KERNEL_ASSERT_LOCKED();
+       SRPL_REMOVE_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bpf_d, bd_next);
+
+       if (SRPL_EMPTY_LOCKED(&bp->bif_dlist)) {
+               /*
+                * Let the driver know that there are no more listeners.
+                */
+               *bp->bif_driverp = NULL;
+       }
+
+       d->bd_bif = NULL;
+
        /*
         * Check if this descriptor had requested promiscuous mode.
         * If so, turn it off.
@@ -295,7 +316,13 @@ bpf_detachd(struct bpf_d *d)
                int error;
 
                d->bd_promisc = 0;
+
+               bpf_get(d);
+               mtx_leave(&d->bd_mtx);
                error = ifpromisc(bp->bif_ifp, 0);
+               mtx_enter(&d->bd_mtx);
+               bpf_put(d);
+
                if (error && !(error == EINVAL || error == ENODEV))
                        /*
                         * Something is really wrong if we were able to put
@@ -304,19 +331,6 @@ bpf_detachd(struct bpf_d *d)
                         */
                        panic("bpf: ifpromisc failed");
        }
-
-       /* Remove d from the interface's descriptor list. */
-       KERNEL_ASSERT_LOCKED();
-       SRPL_REMOVE_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bpf_d, bd_next);
-
-       if (SRPL_EMPTY_LOCKED(&bp->bif_dlist)) {
-               /*
-                * Let the driver know that there are no more listeners.
-                */
-               *d->bd_bif->bif_driverp = 0;
-       }
-
-       d->bd_bif = NULL;
 }
 
 void
@@ -348,6 +362,7 @@ bpfopen(dev_t dev, int flag, int mode, s
        bd->bd_unit = unit;
        bd->bd_bufsize = bpf_bufsize;
        bd->bd_sig = SIGIO;
+       mtx_init(&bd->bd_mtx, IPL_NET);
        task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
 
        if (flag & FNONBLOCK)
@@ -367,16 +382,15 @@ int
 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
 {
        struct bpf_d *d;
-       int s;
 
        d = bpfilter_lookup(minor(dev));
-       s = splnet();
-       if (d->bd_bif)
+       mtx_enter(&d->bd_mtx);
+       if (d->bd_bif != NULL)
                bpf_detachd(d);
        bpf_wakeup(d);
        LIST_REMOVE(d, bd_list);
+       mtx_leave(&d->bd_mtx);
        bpf_put(d);
-       splx(s);
 
        return (0);
 }
@@ -387,11 +401,13 @@ bpfclose(dev_t dev, int flag, int mode, 
  * Zero the length of the new store buffer.
  */
 #define ROTATE_BUFFERS(d) \
+       KASSERT(d->__in_uiomove == 0); \
+       MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
        (d)->bd_hbuf = (d)->bd_sbuf; \
        (d)->bd_hlen = (d)->bd_slen; \
        (d)->bd_sbuf = (d)->bd_fbuf; \
        (d)->bd_slen = 0; \
-       (d)->bd_fbuf = 0;
+       (d)->bd_fbuf = NULL;
 /*
  *  bpfread - read next chunk of packets from buffers
  */
@@ -399,23 +415,26 @@ int
 bpfread(dev_t dev, struct uio *uio, int ioflag)
 {
        struct bpf_d *d;
-       int error;
-       int s;
+       caddr_t hbuf;
+       int hlen, error;
+
+       KERNEL_ASSERT_LOCKED();
 
        d = bpfilter_lookup(minor(dev));
        if (d->bd_bif == NULL)
                return (ENXIO);
 
+       bpf_get(d);
+       mtx_enter(&d->bd_mtx);
+
        /*
         * Restrict application to use a buffer the same size as
         * as kernel buffers.
         */
-       if (uio->uio_resid != d->bd_bufsize)
-               return (EINVAL);
-
-       s = splnet();
-
-       bpf_get(d);
+       if (uio->uio_resid != d->bd_bufsize) {
+               error = EINVAL;
+               goto out;
+       }
 
        /*
         * If there's a timeout, bd_rdStart is tagged when we start the read.
@@ -431,13 +450,12 @@ bpfread(dev_t dev, struct uio *uio, int 
         * ends when the timeout expires or when enough packets
         * have arrived to fill the store buffer.
         */
-       while (d->bd_hbuf == 0) {
+       while (d->bd_hbuf == NULL) {
                if (d->bd_bif == NULL) {
                        /* interface is gone */
                        if (d->bd_slen == 0) {
-                               bpf_put(d);
-                               splx(s);
-                               return (EIO);
+                               error = EIO;
+                               goto out;
                        }
                        ROTATE_BUFFERS(d);
                        break;
@@ -456,23 +474,20 @@ bpfread(dev_t dev, struct uio *uio, int 
                        error = EWOULDBLOCK;
                } else {
                        if ((d->bd_rdStart + d->bd_rtout) < ticks) {
-                               error = tsleep((caddr_t)d, PRINET|PCATCH, "bpf",
-                                   d->bd_rtout);
+                               error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
+                                   "bpf", d->bd_rtout);
                        } else
                                error = EWOULDBLOCK;
                }
-               if (error == EINTR || error == ERESTART) {
-                       bpf_put(d);
-                       splx(s);
-                       return (error);
-               }
+               if (error == EINTR || error == ERESTART)
+                       goto out;
                if (error == EWOULDBLOCK) {
                        /*
                         * On a timeout, return what's in the buffer,
                         * which may be nothing.  If there is something
                         * in the store buffer, we can rotate the buffers.
                         */
-                       if (d->bd_hbuf)
+                       if (d->bd_hbuf != NULL)
                                /*
                                 * We filled up the buffer in between
                                 * getting the timeout and arriving
@@ -481,9 +496,8 @@ bpfread(dev_t dev, struct uio *uio, int 
                                break;
 
                        if (d->bd_slen == 0) {
-                               bpf_put(d);
-                               splx(s);
-                               return (0);
+                               error = 0;
+                               goto out;
                        }
                        ROTATE_BUFFERS(d);
                        break;
@@ -492,22 +506,30 @@ bpfread(dev_t dev, struct uio *uio, int 
        /*
         * At this point, we know we have something in the hold slot.
         */
-       splx(s);
+       hbuf = d->bd_hbuf;
+       hlen = d->bd_hlen;
+       d->bd_hbuf = NULL;
+       d->bd_hlen = 0;
+       d->bd_fbuf = NULL;
+       d->__in_uiomove = 1;
 
        /*
         * Move data from hold buffer into user space.
         * We know the entire buffer is transferred since
         * we checked above that the read buffer is bpf_bufsize bytes.
         */
-       error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
-
-       s = splnet();
-       d->bd_fbuf = d->bd_hbuf;
-       d->bd_hbuf = NULL;
-       d->bd_hlen = 0;
-
+       mtx_leave(&d->bd_mtx);
+       error = uiomove(hbuf, hlen, uio);
+       mtx_enter(&d->bd_mtx);
+
+       /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
+       KASSERT(d->bd_fbuf == NULL);
+       KASSERT(d->bd_hbuf == NULL);
+       d->bd_fbuf = hbuf;
+       d->__in_uiomove = 0;
+out:
+       mtx_leave(&d->bd_mtx);
        bpf_put(d);
-       splx(s);
 
        return (error);
 }
@@ -519,6 +541,8 @@ bpfread(dev_t dev, struct uio *uio, int 
 void
 bpf_wakeup(struct bpf_d *d)
 {
+       MUTEX_ASSERT_LOCKED(&d->bd_mtx);
+
        /*
         * As long as csignal() and selwakeup() need to be protected
         * by the KERNEL_LOCK() we have to delay the wakeup to
@@ -542,6 +566,7 @@ bpf_wakeup_cb(void *xd)
 
        selwakeup(&d->bd_sel);
        bpf_put(d);
+
 }
 
 int
@@ -552,34 +577,49 @@ bpfwrite(dev_t dev, struct uio *uio, int
        struct mbuf *m;
        struct bpf_program *bf;
        struct bpf_insn *fcode = NULL;
-       int error, s;
        struct sockaddr_storage dst;
+       u_int dlt;
+       int error;
+
+       KERNEL_ASSERT_LOCKED();
 
        d = bpfilter_lookup(minor(dev));
-       if (d->bd_bif == NULL)
-               return (ENXIO);
+       bpf_get(d);
+       mtx_enter(&d->bd_mtx);
+       if (d->bd_bif == NULL) {
+               error = ENXIO;
+               goto out;
+       }
 
        ifp = d->bd_bif->bif_ifp;
 
-       if ((ifp->if_flags & IFF_UP) == 0)
-               return (ENETDOWN);
+       if ((ifp->if_flags & IFF_UP) == 0) {
+               error = ENETDOWN;
+               goto out;
+       }
 
-       if (uio->uio_resid == 0)
-               return (0);
+       if (uio->uio_resid == 0) {
+               error = 0;
+               goto out;
+       }
 
        KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */
        bf = srp_get_locked(&d->bd_wfilter);
        if (bf != NULL)
                fcode = bf->bf_insns;
 
-       error = bpf_movein(uio, d->bd_bif->bif_dlt, &m,
-           (struct sockaddr *)&dst, fcode);
+       dlt = d->bd_bif->bif_dlt;
+
+       mtx_leave(&d->bd_mtx);
+       error = bpf_movein(uio, dlt, &m,(struct sockaddr *)&dst, fcode);
        if (error)
                return (error);
+       mtx_enter(&d->bd_mtx);
 
        if (m->m_pkthdr.len > ifp->if_mtu) {
                m_freem(m);
-               return (EMSGSIZE);
+               error = EMSGSIZE;
+               goto out;
        }
 
        m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
@@ -588,23 +628,25 @@ bpfwrite(dev_t dev, struct uio *uio, int
        if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
                dst.ss_family = pseudo_AF_HDRCMPLT;
 
-       s = splsoftnet();
        error = ifp->if_output(ifp, m, (struct sockaddr *)&dst, NULL);
-       splx(s);
-       /*
-        * The driver frees the mbuf.
-        */
+out:
+       mtx_leave(&d->bd_mtx);
+       bpf_put(d);
+
        return (error);
 }
 
 /*
  * Reset a descriptor by flushing its packet buffer and clearing the
- * receive and drop counts.  Should be called at splnet.
+ * receive and drop counts.
  */
 void
-bpf_reset_d(struct bpf_d *d)
+bpf_resetd(struct bpf_d *d)
 {
-       if (d->bd_hbuf) {
+       MUTEX_ASSERT_LOCKED(&d->bd_mtx);
+       KASSERT(d->__in_uiomove == 0);
+
+       if (d->bd_hbuf != NULL) {
                /* Free the hold buffer. */
                d->bd_fbuf = d->bd_hbuf;
                d->bd_hbuf = NULL;
@@ -638,7 +680,7 @@ int
 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
 {
        struct bpf_d *d;
-       int s, error = 0;
+       int error = 0;
 
        d = bpfilter_lookup(minor(dev));
        if (d->bd_locked && suser(p, 0) != 0) {
@@ -666,8 +708,9 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
                }
        }
 
-       switch (cmd) {
+       bpf_get(d);
 
+       switch (cmd) {
        default:
                error = EINVAL;
                break;
@@ -679,11 +722,11 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
                {
                        int n;
 
-                       s = splnet();
+                       mtx_enter(&d->bd_mtx);
                        n = d->bd_slen;
-                       if (d->bd_hbuf)
+                       if (d->bd_hbuf != NULL)
                                n += d->bd_hlen;
-                       splx(s);
+                       mtx_leave(&d->bd_mtx);
 
                        *(int *)addr = n;
                        break;
@@ -709,7 +752,9 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
                                *(u_int *)addr = size = bpf_maxbufsize;
                        else if (size < BPF_MINBUFSIZE)
                                *(u_int *)addr = size = BPF_MINBUFSIZE;
+                       mtx_enter(&d->bd_mtx);
                        d->bd_bufsize = size;
+                       mtx_leave(&d->bd_mtx);
                }
                break;
 
@@ -731,9 +776,9 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
         * Flush read packet buffer.
         */
        case BIOCFLUSH:
-               s = splnet();
-               bpf_reset_d(d);
-               splx(s);
+               mtx_enter(&d->bd_mtx);
+               bpf_resetd(d);
+               mtx_leave(&d->bd_mtx);
                break;
 
        /*
@@ -745,15 +790,14 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
                         * No interface attached yet.
                         */
                        error = EINVAL;
-                       break;
-               }
-               s = splnet();
-               if (d->bd_promisc == 0) {
-                       error = ifpromisc(d->bd_bif->bif_ifp, 1);
-                       if (error == 0)
-                               d->bd_promisc = 1;
+               } else {
+                       if (d->bd_promisc == 0) {
+                               MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
+                               error = ifpromisc(d->bd_bif->bif_ifp, 1);
+                               if (error == 0)
+                                       d->bd_promisc = 1;
+                       }
                }
-               splx(s);
                break;
 
        /*
@@ -770,30 +814,36 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
         * Get device parameters.
         */
        case BIOCGDLT:
+               mtx_enter(&d->bd_mtx);
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        *(u_int *)addr = d->bd_bif->bif_dlt;
+               mtx_leave(&d->bd_mtx);
                break;
 
        /*
         * Set device parameters.
         */
        case BIOCSDLT:
+               mtx_enter(&d->bd_mtx);
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        error = bpf_setdlt(d, *(u_int *)addr);
+               mtx_leave(&d->bd_mtx);
                break;
 
        /*
         * Set interface name.
         */
        case BIOCGETIF:
+               mtx_enter(&d->bd_mtx);
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr);
+               mtx_leave(&d->bd_mtx);
                break;
 
        /*
@@ -931,6 +981,8 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
                *(u_int *)addr = d->bd_sig;
                break;
        }
+
+       bpf_put(d);
        return (error);
 }
 
@@ -945,7 +997,6 @@ bpf_setf(struct bpf_d *d, struct bpf_pro
        struct srp *filter;
        struct bpf_insn *fcode;
        u_int flen, size;
-       int s;
 
        KERNEL_ASSERT_LOCKED();
        filter = wf ? &d->bd_wfilter : &d->bd_rfilter;
@@ -954,9 +1005,9 @@ bpf_setf(struct bpf_d *d, struct bpf_pro
                if (fp->bf_len != 0)
                        return (EINVAL);
                srp_update_locked(&bpf_insn_gc, filter, NULL);
-               s = splnet();
-               bpf_reset_d(d);
-               splx(s);
+               mtx_enter(&d->bd_mtx);
+               bpf_resetd(d);
+               mtx_leave(&d->bd_mtx);
                return (0);
        }
        flen = fp->bf_len;
@@ -981,9 +1032,9 @@ bpf_setf(struct bpf_d *d, struct bpf_pro
 
        srp_update_locked(&bpf_insn_gc, filter, bf);
 
-       s = splnet();
-       bpf_reset_d(d);
-       splx(s);
+       mtx_enter(&d->bd_mtx);
+       bpf_resetd(d);
+       mtx_leave(&d->bd_mtx);
        return (0);
 }
 
@@ -996,7 +1047,7 @@ int
 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 {
        struct bpf_if *bp, *candidate = NULL;
-       int s;
+       int error = 0;
 
        /*
         * Look through attached interfaces for the named one.
@@ -1012,30 +1063,33 @@ bpf_setif(struct bpf_d *d, struct ifreq 
                        candidate = bp;
        }
 
-       if (candidate != NULL) {
-               /*
-                * Allocate the packet buffers if we need to.
-                * If we're already attached to requested interface,
-                * just flush the buffer.
-                */
-               if (d->bd_sbuf == NULL)
-                       bpf_allocbufs(d);
-               s = splnet();
-               if (candidate != d->bd_bif) {
-                       if (d->bd_bif)
-                               /*
-                                * Detach if attached to something else.
-                                */
-                               bpf_detachd(d);
+       /* Not found. */
+       if (candidate == NULL)
+               return (ENXIO);
 
-                       bpf_attachd(d, candidate);
-               }
-               bpf_reset_d(d);
-               splx(s);
-               return (0);
+       /*
+        * Allocate the packet buffers if we need to.
+        * If we're already attached to requested interface,
+        * just flush the buffer.
+        */
+       mtx_enter(&d->bd_mtx);
+       if (d->bd_sbuf == NULL) {
+               if ((error = bpf_allocbufs(d)))
+                       goto out;
        }
-       /* Not found. */
-       return (ENXIO);
+       if (candidate != d->bd_bif) {
+               if (d->bd_bif)
+                       /*
+                        * Detach if attached to something else.
+                        */
+                       bpf_detachd(d);
+
+               bpf_attachd(d, candidate);
+       }
+       bpf_resetd(d);
+out:
+       mtx_leave(&d->bd_mtx);
+       return (error);
 }
 
 /*
@@ -1054,7 +1108,9 @@ int
 bpfpoll(dev_t dev, int events, struct proc *p)
 {
        struct bpf_d *d;
-       int s, revents;
+       int revents;
+
+       KERNEL_ASSERT_LOCKED();
 
        /*
         * An imitation of the FIONREAD ioctl code.
@@ -1075,7 +1131,7 @@ bpfpoll(dev_t dev, int events, struct pr
        revents = events & (POLLOUT | POLLWRNORM);
 
        if (events & (POLLIN | POLLRDNORM)) {
-               s = splnet();
+               mtx_enter(&d->bd_mtx);
                if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
                        revents |= events & (POLLIN | POLLRDNORM);
                else {
@@ -1087,7 +1143,7 @@ bpfpoll(dev_t dev, int events, struct pr
                                d->bd_rdStart = ticks;
                        selrecord(p, &d->bd_sel);
                }
-               splx(s);
+               mtx_leave(&d->bd_mtx);
        }
        return (revents);
 }
@@ -1100,9 +1156,11 @@ bpfkqfilter(dev_t dev, struct knote *kn)
 {
        struct bpf_d *d;
        struct klist *klist;
-       int s;
+
+       KERNEL_ASSERT_LOCKED();
 
        d = bpfilter_lookup(minor(dev));
+
        switch (kn->kn_filter) {
        case EVFILT_READ:
                klist = &d->bd_sel.si_note;
@@ -1112,14 +1170,14 @@ bpfkqfilter(dev_t dev, struct knote *kn)
                return (EINVAL);
        }
 
-       kn->kn_hook = d;
-
-       s = splnet();
        bpf_get(d);
+       kn->kn_hook = d;
        SLIST_INSERT_HEAD(klist, kn, kn_selnext);
+
+       mtx_enter(&d->bd_mtx);
        if (d->bd_rtout != -1 && d->bd_rdStart == 0)
                d->bd_rdStart = ticks;
-       splx(s);
+       mtx_leave(&d->bd_mtx);
 
        return (0);
 }
@@ -1128,12 +1186,11 @@ void
 filt_bpfrdetach(struct knote *kn)
 {
        struct bpf_d *d = kn->kn_hook;
-       int s;
 
-       s = splnet();
+       KERNEL_ASSERT_LOCKED();
+
        SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext);
        bpf_put(d);
-       splx(s);
 }
 
 int
@@ -1141,6 +1198,8 @@ filt_bpfread(struct knote *kn, long hint
 {
        struct bpf_d *d = kn->kn_hook;
 
+       KERNEL_ASSERT_LOCKED();
+
        kn->kn_data = d->bd_hlen;
        if (d->bd_immediate)
                kn->kn_data += d->bd_slen;
@@ -1186,7 +1245,6 @@ _bpf_mtap(caddr_t arg, const struct mbuf
        struct timeval tv;
        int gottime = 0;
        int drop = 0;
-       int s;
 
        if (m == NULL)
                return (0);
@@ -1222,12 +1280,10 @@ _bpf_mtap(caddr_t arg, const struct mbuf
                        if (!gottime++)
                                microtime(&tv);
 
-                       KERNEL_LOCK();
-                       s = splnet();
+                       mtx_enter(&d->bd_mtx);
                        bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn,
                            &tv);
-                       splx(s);
-                       KERNEL_UNLOCK();
+                       mtx_leave(&d->bd_mtx);
 
                        if (d->bd_fildrop)
                                drop = 1;
@@ -1355,8 +1411,9 @@ bpf_catchpacket(struct bpf_d *d, u_char 
 {
        struct bpf_hdr *hp;
        int totlen, curlen;
-       int hdrlen;
+       int hdrlen, do_wakeup = 0;
 
+       MUTEX_ASSERT_LOCKED(&d->bd_mtx);
        if (d->bd_bif == NULL)
                return;
 
@@ -1391,7 +1448,7 @@ bpf_catchpacket(struct bpf_d *d, u_char 
                        return;
                }
                ROTATE_BUFFERS(d);
-               bpf_wakeup(d);
+               do_wakeup = 1;
                curlen = 0;
        }
 
@@ -1414,7 +1471,7 @@ bpf_catchpacket(struct bpf_d *d, u_char 
                 * Immediate mode is set.  A packet arrived so any
                 * reads should be woken up.
                 */
-               bpf_wakeup(d);
+               do_wakeup = 1;
        }
 
        if (d->bd_rdStart && (d->bd_rtout + d->bd_rdStart < ticks)) {
@@ -1423,24 +1480,39 @@ bpf_catchpacket(struct bpf_d *d, u_char 
                 * may have timeouts set.  We got here by getting
                 * a packet, so wake up the reader.
                 */
-               if (d->bd_fbuf) {
+               if (d->bd_fbuf != NULL) {
                        d->bd_rdStart = 0;
                        ROTATE_BUFFERS(d);
-                       bpf_wakeup(d);
+                       do_wakeup = 1;
                }
        }
+
+       if (do_wakeup)
+               bpf_wakeup(d);
 }
 
 /*
  * Initialize all nonzero fields of a descriptor.
  */
-void
+int
 bpf_allocbufs(struct bpf_d *d)
 {
-       d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
-       d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
+       MUTEX_ASSERT_LOCKED(&d->bd_mtx);
+
+       d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
+       if (d->bd_fbuf == NULL)
+               return (ENOMEM);
+
+       d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
+       if (d->bd_sbuf == NULL) {
+               free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
+               return (ENOMEM);
+       }
+
        d->bd_slen = 0;
        d->bd_hlen = 0;
+
+       return (0);
 }
 
 void
@@ -1591,6 +1663,8 @@ bpfilter_lookup(int unit)
 {
        struct bpf_d *bd;
 
+       KERNEL_ASSERT_LOCKED();
+
        LIST_FOREACH(bd, &bpf_d_list, bd_list)
                if (bd->bd_unit == unit)
                        return (bd);
@@ -1634,10 +1708,10 @@ bpf_getdltlist(struct bpf_d *d, struct b
 int
 bpf_setdlt(struct bpf_d *d, u_int dlt)
 {
-       int s;
        struct ifnet *ifp;
        struct bpf_if *bp;
 
+       MUTEX_ASSERT_LOCKED(&d->bd_mtx);
        if (d->bd_bif->bif_dlt == dlt)
                return (0);
        ifp = d->bd_bif->bif_ifp;
@@ -1647,11 +1721,10 @@ bpf_setdlt(struct bpf_d *d, u_int dlt)
        }
        if (bp == NULL)
                return (EINVAL);
-       s = splnet();
        bpf_detachd(d);
        bpf_attachd(d, bp);
-       bpf_reset_d(d);
-       splx(s);
+       bpf_resetd(d);
+
        return (0);
 }
 
Index: net/bpfdesc.h
===================================================================
RCS file: /cvs/src/sys/net/bpfdesc.h,v
retrieving revision 1.31
diff -u -p -r1.31 bpfdesc.h
--- net/bpfdesc.h       22 Aug 2016 10:40:36 -0000      1.31
+++ net/bpfdesc.h       12 Sep 2016 11:59:49 -0000
@@ -56,15 +56,17 @@ struct bpf_d {
         *   fbuf (free) - When read is done, put cluster here.
         * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
         */
+       struct mutex    bd_mtx;         /* protect buffer slots below */
        caddr_t         bd_sbuf;        /* store slot */
        caddr_t         bd_hbuf;        /* hold slot */
        caddr_t         bd_fbuf;        /* free slot */
        int             bd_slen;        /* current length of store buffer */
        int             bd_hlen;        /* current length of hold buffer */
-
        int             bd_bufsize;     /* absolute length of buffers */
 
-       struct bpf_if * bd_bif;         /* interface descriptor */
+       int             __in_uiomove;
+
+       struct bpf_if  *bd_bif;         /* interface descriptor */
        u_long          bd_rtout;       /* Read timeout in 'ticks' */
        u_long          bd_rdStart;     /* when the read started */
        struct srp      bd_rfilter;     /* read filter code */

Reply via email to