The branch main has been updated by glebius:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8774a990ee4094f16d596d4b78e0f3239e5d0c88

commit 8774a990ee4094f16d596d4b78e0f3239e5d0c88
Author:     Gleb Smirnoff <[email protected]>
AuthorDate: 2025-12-15 20:50:35 +0000
Commit:     Gleb Smirnoff <[email protected]>
CommitDate: 2025-12-15 20:50:35 +0000

    bpf: modularize ifnet(9) part of bpf
    
    Imagine that bpf(9) tapping can happen at any point in the network stack,
    not necessarily at interface transmit or receive.  To achieve that we need
    a thin layer of abstraction defined by struct bif_methods, that defines
    how generic bpf layer works with a tap point of this kind.
    
    Implement ifnet(9) specific methods in a separate file bpf_ifnet.c.  At
    this point there is 100% compatibility for all existing interfaces, there
    is no KPI change, yet.  The legacy attaching KPI is layered over new ifnet
    agnostic KPI.  The new KPI may change though, as we can implement multiple
    DLTs per single tap point in a prettier fashion.
    
    The new abstraction layer allows us to move all the 802.11 radio injection
    hacks out of bpf.c into ieee80211_radiotap.c, so do that immediately as a
    good proof of concept.
    
    Reviewed by:            bz
    Differential Revision:  https://reviews.freebsd.org/D53872
---
 sys/conf/files                    |   1 +
 sys/net/bpf.c                     | 718 ++++++++++++++------------------------
 sys/net/bpf.h                     |  28 +-
 sys/net/bpf_ifnet.c               | 257 ++++++++++++++
 sys/net80211/ieee80211_freebsd.c  |  31 --
 sys/net80211/ieee80211_radiotap.c | 128 ++++++-
 6 files changed, 652 insertions(+), 511 deletions(-)

diff --git a/sys/conf/files b/sys/conf/files
index df939f5788ca..aa0d05df45c7 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4189,6 +4189,7 @@ net/bpf_buffer.c          optional bpf
 net/bpf_jitter.c               optional bpf_jitter
 net/bpf_filter.c               optional bpf | netgraph_bpf
 net/bpf_zerocopy.c             optional bpf
+net/bpf_ifnet.c                        optional bpf
 net/bridgestp.c                        optional bridge | if_bridge
 net/dummymbuf.c                        optional dummymbuf
 net/ieee8023ad_lacp.c          optional lagg
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
index 855242728e16..0155b27b4919 100644
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -41,7 +41,6 @@
 
 #include <sys/param.h>
 #include <sys/conf.h>
-#include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/ktr.h>
@@ -60,7 +59,6 @@
 #include <sys/sysent.h>
 #include <sys/systm.h>
 
-#include <sys/event.h>
 #include <sys/file.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
@@ -71,7 +69,6 @@
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_vlan_var.h>
-#include <net/if_dl.h>
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #ifdef BPF_JITTER
@@ -79,31 +76,25 @@
 #endif
 #include <net/bpf_zerocopy.h>
 #include <net/bpfdesc.h>
-#include <net/route.h>
 #include <net/vnet.h>
 
-#include <netinet/in.h>
-#include <netinet/if_ether.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
-#include <net80211/ieee80211_freebsd.h>
-
 #include <security/mac/mac_framework.h>
 
 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
 
-static const struct bpfd_list dead_bpf_if = CK_LIST_HEAD_INITIALIZER();
-
 struct bpf_if {
        struct bpfd_list        bif_dlist;      /* list of all interfaces */
        LIST_ENTRY(bpf_if)      bif_next;       /* descriptor list */
        u_int           bif_dlt;        /* link layer type */
        u_int           bif_hdrlen;     /* length of link header */
-       struct bpfd_list bif_wlist;     /* writer-only list */
-       struct ifnet    *bif_ifp;       /* corresponding interface */
-       struct bpf_if   **bif_bpf;      /* Pointer to pointer to us */
        volatile u_int  bif_refcnt;
+       struct bpfd_list bif_wlist;     /* writer-only list */
+       const struct bif_methods        *bif_methods;
+       void            *bif_softc;
+       const char      *bif_name;
        struct epoch_context epoch_ctx;
 };
 
@@ -111,6 +102,61 @@ struct bpf_if {
 _Static_assert(offsetof(struct bpf_if, bif_dlist) == 0,
     "bpf_if shall start with bif_dlist");
 
+static inline void
+bif_attachd(struct bpf_if *bp)
+{
+       if (bp->bif_methods->bif_attachd != NULL)
+               bp->bif_methods->bif_attachd(bp->bif_softc);
+}
+
+static inline void
+bif_detachd(struct bpf_if *bp)
+{
+       if (bp->bif_methods->bif_detachd != NULL)
+               bp->bif_methods->bif_detachd(bp->bif_softc);
+}
+
+static inline uint32_t
+bif_wrsize(struct bpf_if *bp)
+{
+       if (bp->bif_methods->bif_wrsize != NULL)
+               return (bp->bif_methods->bif_wrsize(bp->bif_softc));
+       else
+               return (0);
+}
+
+static inline int
+bif_promisc(struct bpf_if *bp, bool on)
+{
+       if (bp->bif_methods->bif_promisc != NULL)
+               return (bp->bif_methods->bif_promisc(bp->bif_softc, on));
+       else
+               return (0);
+}
+
+#ifdef MAC
+static inline int
+bif_mac_check_receive(struct bpf_if *bp, struct bpf_d *d)
+{
+       if (bp->bif_methods->bif_mac_check_receive != NULL)
+               return (bp->bif_methods->bif_mac_check_receive(bp->bif_softc,
+                   d));
+       else
+               return (0);
+}
+#endif
+
+/*
+ * XXXGL: Once we migrate to tapping KPI that would specify packet direction
+ * we no longer need bif_chkdir method.
+ */
+static inline bool
+bpf_chkdir(struct bpf_d *d, struct mbuf *m)
+{
+       return (d->bd_bif->bif_methods->bif_chkdir(d->bd_bif->bif_softc, m,
+           d->bd_direction));
+}
+
 struct bpf_program_buffer {
        struct epoch_context    epoch_ctx;
 #ifdef BPF_JITTER
@@ -184,12 +230,9 @@ static void        bpfif_rele(struct bpf_if *);
 
 static void    bpfd_ref(struct bpf_d *);
 static void    bpfd_rele(struct bpf_d *);
-static void    bpf_attachd(struct bpf_d *, struct bpf_if *);
+static int     bpf_attachd(struct bpf_d *d, struct bpf_if *);
 static void    bpf_detachd(struct bpf_d *, bool);
 static void    bpfd_free(epoch_context_t);
-static int     bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
-                   struct sockaddr *, int *, struct bpf_d *);
-static int     bpf_setif(struct bpf_d *, struct ifreq *);
 static void    bpf_timed_out(void *);
 static __inline void
                bpf_wakeup(struct bpf_d *);
@@ -298,7 +341,6 @@ bpfif_free(epoch_context_t ctx)
        struct bpf_if *bp;
 
        bp = __containerof(ctx, struct bpf_if, epoch_ctx);
-       if_rele(bp->bif_ifp);
        free(bp, M_BPF);
 }
 
@@ -552,221 +594,6 @@ bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, 
struct bpf_zbuf *bz)
        return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
 }
 
-/*
- * General BPF functions.
- */
-static int
-bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
-    struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
-{
-       const struct ieee80211_bpf_params *p;
-       struct ether_header *eh;
-       struct mbuf *m;
-       int error;
-       int len;
-       int hlen;
-       int slen;
-
-       /*
-        * Build a sockaddr based on the data link layer type.
-        * We do this at this level because the ethernet header
-        * is copied directly into the data field of the sockaddr.
-        * In the case of SLIP, there is no header and the packet
-        * is forwarded as is.
-        * Also, we are careful to leave room at the front of the mbuf
-        * for the link level header.
-        */
-       switch (linktype) {
-       case DLT_SLIP:
-               sockp->sa_family = AF_INET;
-               hlen = 0;
-               break;
-
-       case DLT_EN10MB:
-               sockp->sa_family = AF_UNSPEC;
-               /* XXX Would MAXLINKHDR be better? */
-               hlen = ETHER_HDR_LEN;
-               break;
-
-       case DLT_FDDI:
-               sockp->sa_family = AF_IMPLINK;
-               hlen = 0;
-               break;
-
-       case DLT_RAW:
-               sockp->sa_family = AF_UNSPEC;
-               hlen = 0;
-               break;
-
-       case DLT_NULL:
-               /*
-                * null interface types require a 4 byte pseudo header which
-                * corresponds to the address family of the packet.
-                */
-               sockp->sa_family = AF_UNSPEC;
-               hlen = 4;
-               break;
-
-       case DLT_ATM_RFC1483:
-               /*
-                * en atm driver requires 4-byte atm pseudo header.
-                * though it isn't standard, vpi:vci needs to be
-                * specified anyway.
-                */
-               sockp->sa_family = AF_UNSPEC;
-               hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
-               break;
-
-       case DLT_PPP:
-               sockp->sa_family = AF_UNSPEC;
-               hlen = 4;       /* This should match PPP_HDRLEN */
-               break;
-
-       case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
-               sockp->sa_family = AF_IEEE80211;
-               hlen = 0;
-               break;
-
-       case DLT_IEEE802_11_RADIO:      /* IEEE 802.11 wireless w/ phy params */
-               sockp->sa_family = AF_IEEE80211;
-               sockp->sa_len = 12;     /* XXX != 0 */
-               hlen = sizeof(struct ieee80211_bpf_params);
-               break;
-
-       default:
-               return (EIO);
-       }
-
-       len = uio->uio_resid;
-       if (len < hlen || len - hlen > ifp->if_mtu)
-               return (EMSGSIZE);
-
-       /* Allocate a mbuf, up to MJUM16BYTES bytes, for our write. */
-       m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR);
-       if (m == NULL)
-               return (EIO);
-       m->m_pkthdr.len = m->m_len = len;
-       *mp = m;
-
-       error = uiomove(mtod(m, u_char *), len, uio);
-       if (error)
-               goto bad;
-
-       slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
-       if (slen == 0) {
-               error = EPERM;
-               goto bad;
-       }
-
-       /* Check for multicast destination */
-       switch (linktype) {
-       case DLT_EN10MB:
-               eh = mtod(m, struct ether_header *);
-               if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
-                       if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
-                           ETHER_ADDR_LEN) == 0)
-                               m->m_flags |= M_BCAST;
-                       else
-                               m->m_flags |= M_MCAST;
-               }
-               if (!(d->bd_flags & BPFD_HDRCMPLT)) {
-                       memcpy(eh->ether_shost, IF_LLADDR(ifp),
-                           sizeof(eh->ether_shost));
-               }
-               break;
-       }
-
-       /*
-        * Make room for link header, and copy it to sockaddr
-        */
-       if (hlen != 0) {
-               if (sockp->sa_family == AF_IEEE80211) {
-                       /*
-                        * Collect true length from the parameter header
-                        * NB: sockp is known to be zero'd so if we do a
-                        *     short copy unspecified parameters will be
-                        *     zero.
-                        * NB: packet may not be aligned after stripping
-                        *     bpf params
-                        * XXX check ibp_vers
-                        */
-                       p = mtod(m, const struct ieee80211_bpf_params *);
-                       hlen = p->ibp_len;
-                       if (hlen > sizeof(sockp->sa_data)) {
-                               error = EINVAL;
-                               goto bad;
-                       }
-               }
-               bcopy(mtod(m, const void *), sockp->sa_data, hlen);
-       }
-       *hdrlen = hlen;
-
-       return (0);
-bad:
-       m_freem(m);
-       return (error);
-}
-
-/*
- * Attach descriptor to the bpf interface, i.e. make d listen on bp,
- * then reset its buffers and counters with reset_d().
- */
-static void
-bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
-{
-       int op_w;
-
-       BPF_LOCK_ASSERT();
-
-       /*
-        * Save sysctl value to protect from sysctl change
-        * between reads
-        */
-       op_w = V_bpf_optimize_writers || d->bd_writer;
-
-       if (d->bd_bif != NULL)
-               bpf_detachd(d, false);
-       /*
-        * Point d at bp, and add d to the interface's list.
-        * Since there are many applications using BPF for
-        * sending raw packets only (dhcpd, cdpd are good examples)
-        * we can delay adding d to the list of active listeners until
-        * some filter is configured.
-        */
-
-       BPFD_LOCK(d);
-       /*
-        * Hold reference to bpif while descriptor uses this interface.
-        */
-       bpfif_ref(bp);
-       d->bd_bif = bp;
-       if (op_w != 0) {
-               /* Add to writers-only list */
-               CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
-               /*
-                * We decrement bd_writer on every filter set operation.
-                * First BIOCSETF is done by pcap_open_live() to set up
-                * snap length. After that appliation usually sets its own
-                * filter.
-                */
-               d->bd_writer = 2;
-       } else
-               CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
-
-       reset_d(d);
-
-       /* Trigger EVFILT_WRITE events. */
-       bpf_wakeup(d);
-
-       BPFD_UNLOCK(d);
-
-       CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
-           __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
-
-       if (op_w == 0)
-               EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
-}
-
 /*
  * Check if we need to upgrade our descriptor @d from write-only mode.
  */
@@ -837,8 +664,7 @@ static void
 bpf_detachd(struct bpf_d *d, bool detached_ifp)
 {
        struct bpf_if *bp;
-       struct ifnet *ifp;
-       int error;
+       bool writer;
 
        BPF_LOCK_ASSERT();
        CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
@@ -848,11 +674,8 @@ bpf_detachd(struct bpf_d *d, bool detached_ifp)
                return;
 
        BPFD_LOCK(d);
-       /* Remove d from the interface's descriptor list. */
        CK_LIST_REMOVE(d, bd_next);
-       /* Save bd_writer value */
-       error = d->bd_writer;
-       ifp = bp->bif_ifp;
+       writer = (d->bd_writer > 0);
        d->bd_bif = NULL;
        if (detached_ifp) {
                /*
@@ -863,30 +686,14 @@ bpf_detachd(struct bpf_d *d, bool detached_ifp)
        }
        BPFD_UNLOCK(d);
 
-       /* Call event handler iff d is attached */
-       if (error == 0)
-               EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
+       if (!writer)
+               bif_detachd(bp);
 
-       /*
-        * Check if this descriptor had requested promiscuous mode.
-        * If so and ifnet is not detached, turn it off.
-        */
        if (d->bd_promisc && !detached_ifp) {
                d->bd_promisc = 0;
-               CURVNET_SET(ifp->if_vnet);
-               error = ifpromisc(ifp, 0);
-               CURVNET_RESTORE();
-               if (error != 0 && error != ENXIO) {
-                       /*
-                        * ENXIO can happen if a pccard is unplugged
-                        * Something is really wrong if we were able to put
-                        * the driver into promiscuous mode, but can't
-                        * take it out.
-                        */
-                       if_printf(bp->bif_ifp,
-                               "bpf_detach: ifpromisc failed (%d)\n", error);
-               }
+               (void)bif_promisc(bp, false);
        }
+
        bpfif_rele(bp);
 }
 
@@ -1164,128 +971,96 @@ bpf_ready(struct bpf_d *d)
 static int
 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 {
-       struct route ro;
-       struct sockaddr dst;
        struct epoch_tracker et;
        struct bpf_if *bp;
        struct bpf_d *d;
-       struct ifnet *ifp;
        struct mbuf *m, *mc;
-       int error, hlen;
+       ssize_t len;
+       int error;
 
        error = devfs_get_cdevpriv((void **)&d);
        if (error != 0)
                return (error);
 
-       NET_EPOCH_ENTER(et);
+       if (uio->uio_resid == 0)
+               return (0);
+
        BPFD_LOCK(d);
-       BPF_PID_REFRESH_CUR(d);
-       counter_u64_add(d->bd_wcount, 1);
-       if ((bp = d->bd_bif) == NULL) {
+       if ((bp = d->bd_bif) == NULL)
                error = ENXIO;
-               goto out_locked;
-       }
-
-       ifp = bp->bif_ifp;
-       if ((ifp->if_flags & IFF_UP) == 0) {
-               error = ENETDOWN;
-               goto out_locked;
+       else if (bp->bif_methods->bif_write == NULL)
+               error = EOPNOTSUPP;
+       if (error) {
+               BPFD_UNLOCK(d);
+               counter_u64_add(d->bd_wdcount, 1);
+               return (error);
        }
-
-       if (uio->uio_resid == 0)
-               goto out_locked;
-
-       bzero(&dst, sizeof(dst));
-       m = NULL;
-       hlen = 0;
-
-       /*
-        * Take extra reference, unlock d and exit from epoch section,
-        * since bpf_movein() can sleep.
-        */
        bpfd_ref(d);
-       NET_EPOCH_EXIT(et);
        BPFD_UNLOCK(d);
 
-       error = bpf_movein(uio, (int)bp->bif_dlt, ifp,
-           &m, &dst, &hlen, d);
-
-       if (error != 0) {
-               counter_u64_add(d->bd_wdcount, 1);
-               bpfd_rele(d);
-               return (error);
+       len = uio->uio_resid;
+       /* Allocate a mbuf, up to MJUM16BYTES bytes, for our write. */
+       m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR);
+       if (m == NULL) {
+               error = ENOMEM;
+               goto fail_wref;
        }
+       m->m_pkthdr.len = m->m_len = len;
 
-       BPFD_LOCK(d);
-       /*
-        * Check that descriptor is still attached to the interface.
-        * This can happen on bpfdetach(). To avoid access to detached
-        * ifnet, free mbuf and return ENXIO.
-        */
-       if (d->bd_bif == NULL) {
-               counter_u64_add(d->bd_wdcount, 1);
-               BPFD_UNLOCK(d);
-               bpfd_rele(d);
-               m_freem(m);
-               return (ENXIO);
+       error = uiomove(mtod(m, u_char *), len, uio);
+       if (error)
+               goto fail_wref;
+
+       if (bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len) == 0) {
+               error = EPERM;
+               goto fail_wref;
        }
-       counter_u64_add(d->bd_wfcount, 1);
-       if (d->bd_flags & BPFD_HDRCMPLT)
-               dst.sa_family = pseudo_AF_HDRCMPLT;
 
        if (d->bd_flags & BPFD_FEEDBACK) {
-               mc = m_dup(m, M_NOWAIT);
-               if (mc != NULL)
-                       mc->m_pkthdr.rcvif = ifp;
+               mc = m_dup(m, M_WAITOK);
                /* Set M_PROMISC for outgoing packets to be discarded. */
                if (d->bd_direction == BPF_D_INOUT)
                        m->m_flags |= M_PROMISC;
        } else
                mc = NULL;
 
-       m->m_pkthdr.len -= hlen;
-       m->m_len -= hlen;
-       m->m_data += hlen;      /* XXX */
+       /* XXXGL: should belong to bpf_ifnet.c */
+       if (d->bd_pcp != 0)
+               (void)vlan_set_pcp(m, d->bd_pcp);
 
-       CURVNET_SET(ifp->if_vnet);
+       BPFD_LOCK(d);
 #ifdef MAC
        mac_bpfdesc_create_mbuf(d, m);
        if (mc != NULL)
                mac_bpfdesc_create_mbuf(d, mc);
 #endif
-
-       bzero(&ro, sizeof(ro));
-       if (hlen != 0) {
-               ro.ro_prepend = (u_char *)&dst.sa_data;
-               ro.ro_plen = hlen;
-               ro.ro_flags = RT_HAS_HEADER;
+       /*
+        * Check that descriptor is still attached to the interface.
+        * This can happen on bpfdetach() or if other thread did BIOCSDLT.
+        */
+       if (__predict_false(d->bd_bif != bp)) {
+               BPFD_UNLOCK(d);
+               m_freem(mc);
+               error = ENXIO;
+               goto fail_wref;
        }
+       BPFD_UNLOCK(d);
 
-       if (d->bd_pcp != 0)
-               vlan_set_pcp(m, d->bd_pcp);
-
-       /* Avoid possible recursion on BPFD_LOCK(). */
        NET_EPOCH_ENTER(et);
-       BPFD_UNLOCK(d);
-       error = (*ifp->if_output)(ifp, m, &dst, &ro);
+       error = bp->bif_methods->bif_write(bp->bif_softc, m, mc, d->bd_flags);
+       NET_EPOCH_EXIT(et);
        if (error)
                counter_u64_add(d->bd_wdcount, 1);
-
-       if (mc != NULL) {
-               if (error == 0)
-                       (*ifp->if_input)(ifp, mc);
-               else
-                       m_freem(mc);
-       }
-       NET_EPOCH_EXIT(et);
-       CURVNET_RESTORE();
+       else
+               counter_u64_add(d->bd_wfcount, 1);
        bpfd_rele(d);
+
        return (error);
 
-out_locked:
+fail_wref:
        counter_u64_add(d->bd_wdcount, 1);
-       NET_EPOCH_EXIT(et);
-       BPFD_UNLOCK(d);
+       bpfd_rele(d);
+       m_freem(m);
        return (error);
 }
 
@@ -1506,8 +1281,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int 
flags,
                         */
                        error = EINVAL;
                } else if (d->bd_promisc == 0) {
-                       error = ifpromisc(d->bd_bif->bif_ifp, 1);
-                       if (error == 0)
+                       struct bpf_if *bp = d->bd_bif;
+
+                       if ((error = bif_promisc(bp, true)) == 0)
                                d->bd_promisc = 1;
                }
                BPF_UNLOCK();
@@ -1579,10 +1355,10 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, 
int flags,
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else {
-                       struct ifnet *const ifp = d->bd_bif->bif_ifp;
+                       struct bpf_if *const bp = d->bd_bif;
                        struct ifreq *const ifr = (struct ifreq *)addr;
 
-                       strlcpy(ifr->ifr_name, ifp->if_xname,
+                       strlcpy(ifr->ifr_name, bp->bif_name,
                            sizeof(ifr->ifr_name));
                }
                BPF_UNLOCK();
@@ -1591,7 +1367,10 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int 
flags,
        /*
         * Set interface.
         */
-       case BIOCSETIF:
+       case BIOCSETIF: {
+               struct ifreq *const ifr = (struct ifreq *)addr;
+               struct bpf_if *bp;
+
                /*
                 * Behavior here depends on the buffering model.  If we're
                 * using kernel memory buffers, then we can allocate them here.
@@ -1611,10 +1390,21 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, 
int flags,
                } else
                        BPFD_UNLOCK(d);
                BPF_LOCK();
-               error = bpf_setif(d, (struct ifreq *)addr);
+               /*
+                * Look through attached interfaces for the named one.
+                */
+               LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+                       if (strncmp(ifr->ifr_name, bp->bif_name,
+                           sizeof(ifr->ifr_name)) == 0)
+                               break;
+               }
+               if (bp != NULL)
+                       error = bpf_attachd(d, bp);
+               else
+                       error = ENXIO;
                BPF_UNLOCK();
                break;
-
+       }
        /*
         * Set read timeout.
         */
@@ -2051,41 +1841,24 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, 
u_long cmd)
                NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx);
 
        if (track_event)
-               EVENTHANDLER_INVOKE(bpf_track,
-                   d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1);
+               bif_attachd(d->bd_bif);
 
        BPF_UNLOCK();
        return (0);
 }
 
 /*
- * Detach a file from its current interface (if attached at all) and attach
- * to the interface indicated by the name stored in ifr.
- * Return an errno or 0.
+ * Attach descriptor to a tap point, possibly detaching from the old one,
+ * reset the counters.
+ * XXXGL: this KPI is subject to change
  */
 static int
-bpf_setif(struct bpf_d *d, struct ifreq *ifr)
+bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
-       struct bpf_if *bp;
-       struct ifnet *theywant;
+       bool writer;
 
        BPF_LOCK_ASSERT();
 
-       theywant = ifunit(ifr->ifr_name);
-       if (theywant == NULL)
-               return (ENXIO);
-       /*
-        * Look through attached interfaces for the named one.
-        */
-       LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-               if (bp->bif_ifp == theywant &&
-                   bp->bif_bpf == &theywant->if_bpf)
-                       break;
-       }
-       if (bp == NULL)
-               return (ENXIO);
-
-       MPASS(bp == theywant->if_bpf);
        /*
         * At this point, we expect the buffer is already allocated.  If not,
         * return an error.
@@ -2098,15 +1871,61 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
                break;
 
        default:
-               panic("bpf_setif: bufmode %d", d->bd_bufmode);
+               panic("%s: bufmode %d", __func__, d->bd_bufmode);
        }
-       if (bp != d->bd_bif)
-               bpf_attachd(d, bp);
-       else {
+
+       if (bp == d->bd_bif) {
                BPFD_LOCK(d);
                reset_d(d);
                BPFD_UNLOCK(d);
-       }
+               return (0);
+       } else if (d->bd_bif != NULL)
+               bpf_detachd(d, false);
+
+       /*
+        * Save sysctl value to protect from sysctl change between reads.
+        */
+       writer = V_bpf_optimize_writers || (d->bd_writer > 0);
+
+       /*
+        * Point d at bp, and add d to the interface's list.
+        * Since there are many applications using BPF for
+        * sending raw packets only (dhcpd, cdpd are good examples)
+        * we can delay adding d to the list of active listeners until
+        * some filter is configured.
+        */
+       BPFD_LOCK(d);
+       /*
+        * Hold reference to bpif while descriptor uses this interface.
+        */
+       bpfif_ref(bp);
+       d->bd_bif = bp;
+       if (writer) {
+               /* Add to writers-only list */
+               CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
+               /*
+                * We decrement bd_writer on every filter set operation.
+                * First BIOCSETF is done by pcap_open_live() to set up
+                * snap length. After that application usually sets its own
+                * filter.
+                */
+               d->bd_writer = 2;
+       } else
+               CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+
+       reset_d(d);
+
+       /* Trigger EVFILT_WRITE events. */
+       bpf_wakeup(d);
+
+       BPFD_UNLOCK(d);
+
+       CTR3(KTR_NET, "%s: called by pid %d, adding to %s list",
+           __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
+
+       if (!writer)
+               bif_attachd(bp);
+
        return (0);
 }
 
@@ -2229,7 +2048,7 @@ filt_bpfwrite(struct knote *kn, long hint)
                kn->kn_data = 0;
                return (0);
        } else {
-               kn->kn_data = d->bd_bif->bif_ifp->if_mtu;
+               kn->kn_data = bif_wrsize(d->bd_bif);
                return (1);
        }
 }
@@ -2305,7 +2124,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
        CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
                counter_u64_add(d->bd_rcount, 1);
                /*
-                * NB: We dont call BPF_CHECK_DIRECTION() here since there
+                * NB: We don't check the direction here since there
                 * is no way for the caller to indiciate to us whether this
                 * packet is inbound or outbound. In the bpf_mtap() routines,
                 * we use the interface pointers on the mbuf to figure it out.
@@ -2327,7 +2146,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
                                gottime = bpf_gettime(&bt, d->bd_tstamp,
                                    NULL);
 #ifdef MAC
-                       if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+                       if (bif_mac_check_receive(bp, d) == 0)
 #endif
                                catchpacket(d, pkt, pktlen, slen,
                                    bpf_append_bytes, &bt);
@@ -2344,10 +2163,6 @@ bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
                bpf_tap(ifp->if_bpf, pkt, pktlen);
 }
 
-#define        BPF_CHECK_DIRECTION(d, r, i)                            \
-           (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||   \
-           ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
-
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf chain.
  * Locking model is explained in bpf_tap().
@@ -2375,7 +2190,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 
        NET_EPOCH_ENTER(et);
        CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-               if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
+               if (bpf_chkdir(d, m))
                        continue;
                counter_u64_add(d->bd_rcount, 1);
 #ifdef BPF_JITTER
@@ -2394,7 +2209,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
                        if (gottime < bpf_ts_quality(d->bd_tstamp))
                                gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
-                       if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+                       if (bif_mac_check_receive(bp, d) == 0)
 #endif
                                catchpacket(d, (u_char *)m, pktlen, slen,
                                    bpf_append_mbuf, &bt);
@@ -2449,7 +2264,7 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, 
struct mbuf *m)
 
        NET_EPOCH_ENTER(et);
        CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-               if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
+               if (bpf_chkdir(d, m))
                        continue;
                counter_u64_add(d->bd_rcount, 1);
                slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
@@ -2460,7 +2275,7 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, 
struct mbuf *m)
                        if (gottime < bpf_ts_quality(d->bd_tstamp))
                                gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
-                       if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+                       if (bif_mac_check_receive(bp, d) == 0)
 #endif
                                catchpacket(d, (u_char *)&mb, pktlen, slen,
                                    bpf_append_mbuf, &bt);
@@ -2479,7 +2294,6 @@ bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct 
mbuf *m)
        }
 }
 
-#undef BPF_CHECK_DIRECTION
 #undef BPF_TSTAMP_NONE
 #undef BPF_TSTAMP_FAST
 #undef BPF_TSTAMP_NORMAL
@@ -2759,57 +2573,36 @@ bpfd_free(epoch_context_t ctx)
 }
 
 /*
- * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
- * fixed size of the link header (variable length headers not yet supported).
+ * Attach a tap point to bpf.
+ * XXX: with current KPI it is consumer's responsibility to avoid duplicates.
  */
-void
-bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
-{
-
-       bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
-}
-
-/*
- * Attach an interface to bpf.  ifp is a pointer to the structure
- * defining the interface to be attached, dlt is the link layer type,
- * and hdrlen is the fixed size of the link header (variable length
- * headers are not yet supporrted).
- */
-void
-bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen,
-    struct bpf_if **driverp)
+struct bpf_if *
+bpf_attach(const char *name, u_int dlt, u_int hdrlen,
+    const struct bif_methods *methods, void *sc)
 {
        struct bpf_if *bp;
 
-       KASSERT(*driverp == NULL,
-           ("bpfattach2: driverp already initialized"));
-
        bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
 
        CK_LIST_INIT(&bp->bif_dlist);
        CK_LIST_INIT(&bp->bif_wlist);
-       bp->bif_ifp = ifp;
        bp->bif_dlt = dlt;
        bp->bif_hdrlen = hdrlen;
-       bp->bif_bpf = driverp;
+       bp->bif_softc = sc;
+       bp->bif_name = name;
+       bp->bif_methods = methods;
        refcount_init(&bp->bif_refcnt, 1);
-       *driverp = bp;
-       /*
-        * Reference ifnet pointer, so it won't freed until
-        * we release it.
-        */
-       if_ref(ifp);
        BPF_LOCK();
        LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
        BPF_UNLOCK();
 
-       if (bootverbose && IS_DEFAULT_VNET(curvnet))
-               if_printf(ifp, "bpf attached\n");
+       return (bp);
 }
 
 #ifdef VIMAGE
 /*
  * Detach descriptors on interface's vmove event.
+ * XXXGL: shouldn't be a special case, but a full detach.
  */
 void
 bpf_ifdetach(struct ifnet *ifp)
@@ -2819,7 +2612,8 @@ bpf_ifdetach(struct ifnet *ifp)
 
        BPF_LOCK();
        LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-               if (bp->bif_ifp != ifp)
*** 706 LINES SKIPPED ***

Reply via email to