On Sun, Feb 07, 2021 at 06:55:37PM +0100, Sebastian Benoit wrote:
> David Gwynne(da...@gwynne.id.au) on 2021.01.27 17:13:09 +1000:
> > some of the discussion around dup-to made me think that a diff we
> > have here at work might be more broadly useful.
> > 
> > we run a box here with a bunch of ethernet ports plugged into span
> > ports on switches. basically every packet going to our firewalls gets
> > duplicated to this host. we then have code that generates flow data from
> > these ports. it's also nice to have one place to ssh to and so you can
> > tcpdump things. anyway, that flow collector watches packets on those
> > interfaces via bpf, but apart from that we don't actually want to
> > do anythign with the packets those interfaces receive. we especially
> > do not want them entering the stack. we ssh to this box over the
> > firewall, so if the span port copies those packets to the box and
> > the stack tries to process them, things dont work great.
> > 
> > we could enable the fildrop stuff with bpf, but there's an annoying gap
> > between when the interfaces come up and when the flow collector starts
> > running. also, if the flow collector crashes or we restart it cos we're
> > hacking on the code, this provides more gaps for packets to enter the
> > stack.
> > 
> > we prevented this by adding a "monitor" interface flag. it makes the
> > interface input code drop all the packets rather than queuing them for
> > the stack to process.
> > 
> > is there any interest in having this in the tree?
> > 
> > if so, i need to do some work to make sure all interfaces push
> > packets into the stack with if_input, ifiq_input, or if_vinput. a
> > bunch of them like gif and gre currently call protocol input routines
> > directly, so they skip this check.
> > 
> > so, thoughts?
> 
> I'd like this.
> 
> Previously when i needed something similar, i put the interface into its own
> routing domain. But of course that doesnt avoid the packets entering the
> stack, just some consequences.
> 
> I also think 'monitor' is the right keyword for ifconfig.
> 
> ok benno, but manpage is missing

this is also missing. this lets l3 interfaces use the if_vinput
machinery by providing a p2p_input handler. for if_vinput to support
p2p interfaces, they have to be able to say what kind of bpf_mtap
handling they need rather than have the machinery assume everything
is an ethernet packet. this also lets us factor out the l3 input
handling from a lot of these drivers.

in turn, this makes it possible to use monitor on gif, gre, mgre,
mpe, and mpip. looks like it would already work on tun, but im not
sure what the point of that is.

ok?

Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.625
diff -u -p -r1.625 if.c
--- net/if.c    18 Jan 2021 09:55:43 -0000      1.625
+++ net/if.c    14 Feb 2021 12:12:21 -0000
@@ -847,13 +847,17 @@ if_vinput(struct ifnet *ifp, struct mbuf
        m->m_pkthdr.ph_ifidx = ifp->if_index;
        m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
 
+#if NPF > 0
+       pf_pkt_addr_changed(m);
+#endif
+
        counters_pkt(ifp->if_counters,
            ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
 
 #if NBPFILTER > 0
        if_bpf = ifp->if_bpf;
        if (if_bpf) {
-               if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
+               if (ifp->if_bpf_mtap(if_bpf, m, BPF_DIRECTION_IN)) {
                        m_freem(m);
                        return;
                }
@@ -1497,6 +1501,42 @@ p2p_rtrequest(struct ifnet *ifp, int req
        }
 }
 
+int
+p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir)
+{
+#if NBPFILTER > 0
+       return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir));
+#else
+       return (0);
+#endif
+}
+
+void
+p2p_input(struct ifnet *ifp, struct mbuf *m)
+{
+       void (*input)(struct ifnet *, struct mbuf *);
+
+       switch (m->m_pkthdr.ph_family) {
+       case AF_INET:
+               input = ipv4_input;
+               break;
+#ifdef INET6
+       case AF_INET6:
+               input = ipv6_input;
+               break;
+#endif
+#ifdef MPLS
+       case AF_MPLS:
+               input = mpls_input;
+               break;
+#endif
+       default:
+               m_freem(m);
+               return;
+       }
+
+       (*input)(ifp, m);
+}
 
 /*
  * Bring down all interfaces
Index: net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.268
diff -u -p -r1.268 if_ethersubr.c
--- net/if_ethersubr.c  4 Jan 2021 21:21:41 -0000       1.268
+++ net/if_ethersubr.c  14 Feb 2021 12:12:21 -0000
@@ -680,7 +680,9 @@ ether_ifattach(struct ifnet *ifp)
        if_alloc_sadl(ifp);
        memcpy(LLADDR(ifp->if_sadl), ac->ac_enaddr, ifp->if_addrlen);
        LIST_INIT(&ac->ac_multiaddrs);
+
 #if NBPFILTER > 0
+       ifp->if_bpf_mtap = bpf_mtap_ether;
        bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN);
 #endif
 }
Index: net/if_gif.c
===================================================================
RCS file: /cvs/src/sys/net/if_gif.c,v
retrieving revision 1.131
diff -u -p -r1.131 if_gif.c
--- net/if_gif.c        21 Aug 2020 22:59:27 -0000      1.131
+++ net/if_gif.c        14 Feb 2021 12:12:21 -0000
@@ -166,6 +166,8 @@ gif_clone_create(struct if_clone *ifc, i
        ifp->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
        ifp->if_xflags = IFXF_CLONED;
        ifp->if_ioctl  = gif_ioctl;
+       ifp->if_bpf_mtap = p2p_bpf_mtap;
+       ifp->if_input  = p2p_input;
        ifp->if_start  = gif_start;
        ifp->if_output = gif_output;
        ifp->if_rtrequest = p2p_rtrequest;
@@ -174,6 +176,7 @@ gif_clone_create(struct if_clone *ifc, i
 
        if_attach(ifp);
        if_alloc_sadl(ifp);
+       if_counters_alloc(ifp);
 
 #if NBPFILTER > 0
        bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
@@ -787,7 +790,6 @@ gif_input(struct gif_tunnel *key, struct
        struct mbuf *m = *mp;
        struct gif_softc *sc;
        struct ifnet *ifp;
-       void (*input)(struct ifnet *, struct mbuf *);
        uint8_t itos;
        int rxhprio;
 
@@ -830,7 +832,6 @@ gif_input(struct gif_tunnel *key, struct
                        ip_tos_patch(ip, itos);
 
                m->m_pkthdr.ph_family = AF_INET;
-               input = ipv4_input;
                break;
        }
 #ifdef INET6
@@ -851,7 +852,6 @@ gif_input(struct gif_tunnel *key, struct
                SET(ip6->ip6_flow, htonl(itos << 20));
 
                m->m_pkthdr.ph_family = AF_INET6;
-               input = ipv6_input;
                break;
        }
 #endif /* INET6 */
@@ -866,7 +866,6 @@ gif_input(struct gif_tunnel *key, struct
                itos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5;
        
                m->m_pkthdr.ph_family = AF_MPLS;
-               input = mpls_input;
                break;
        }
 #endif /* MPLS */
@@ -875,8 +874,6 @@ gif_input(struct gif_tunnel *key, struct
        }
 
        m->m_flags &= ~(M_MCAST|M_BCAST);
-       m->m_pkthdr.ph_ifidx = ifp->if_index;
-       m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
 
        switch (rxhprio) {
        case IF_HDRPRIO_PACKET:
@@ -893,25 +890,8 @@ gif_input(struct gif_tunnel *key, struct
                break;
        }
 
-#if NPF > 0
-       pf_pkt_addr_changed(m);
-#endif
-
-       ifp->if_ipackets++;
-       ifp->if_ibytes += m->m_pkthdr.len;
-
-#if NBPFILTER > 0
-       {
-               caddr_t if_bpf = ifp->if_bpf;
-               if (if_bpf) {
-                       bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
-                           m, BPF_DIRECTION_IN);
-               }
-       }
-#endif
-
        *mp = NULL;
-       (*input)(ifp, m);
+       if_vinput(ifp, m);
        return (IPPROTO_DONE);
 
  drop:
Index: net/if_gre.c
===================================================================
RCS file: /cvs/src/sys/net/if_gre.c,v
retrieving revision 1.164
diff -u -p -r1.164 if_gre.c
--- net/if_gre.c        19 Jan 2021 07:31:47 -0000      1.164
+++ net/if_gre.c        14 Feb 2021 12:12:21 -0000
@@ -590,6 +590,8 @@ gre_clone_create(struct if_clone *ifc, i
        ifp->if_mtu = GREMTU;
        ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
        ifp->if_xflags = IFXF_CLONED;
+       ifp->if_bpf_mtap = p2p_bpf_mtap;
+       ifp->if_input = p2p_input;
        ifp->if_output = gre_output;
        ifp->if_start = gre_start;
        ifp->if_ioctl = gre_ioctl;
@@ -659,6 +661,8 @@ mgre_clone_create(struct if_clone *ifc, 
        ifp->if_mtu = GREMTU;
        ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
        ifp->if_xflags = IFXF_CLONED;
+       ifp->if_bpf_mtap = p2p_bpf_mtap;
+       ifp->if_input = p2p_input;
        ifp->if_rtrequest = mgre_rtrequest;
        ifp->if_output = mgre_output;
        ifp->if_start = mgre_start;
@@ -1006,12 +1010,8 @@ gre_input_key(struct mbuf **mp, int *off
        caddr_t buf;
        struct gre_header *gh;
        struct gre_h_key *gkh;
-       void (*input)(struct ifnet *, struct mbuf *);
        struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
            uint8_t *, uint8_t);
-#if NBPFILTER > 0
-       int bpf_af = AF_UNSPEC; /* bpf */
-#endif
        int mcast = 0;
        uint8_t itos;
 
@@ -1114,19 +1114,13 @@ gre_input_key(struct mbuf **mp, int *off
                /* FALLTHROUGH */
        }
        case htons(ETHERTYPE_IP):
-#if NBPFILTER > 0
-               bpf_af = AF_INET;
-#endif
+               m->m_pkthdr.ph_family = AF_INET;
                patch = gre_ipv4_patch;
-               input = ipv4_input;
                break;
 #ifdef INET6
        case htons(ETHERTYPE_IPV6):
-#if NBPFILTER > 0
-               bpf_af = AF_INET6;
-#endif
+               m->m_pkthdr.ph_family = AF_INET6;
                patch = gre_ipv6_patch;
-               input = ipv6_input;
                break;
 #endif
 #ifdef MPLS
@@ -1134,11 +1128,8 @@ gre_input_key(struct mbuf **mp, int *off
                mcast = M_MCAST|M_BCAST;
                /* fallthrough */
        case htons(ETHERTYPE_MPLS):
-#if NBPFILTER > 0
-               bpf_af = AF_MPLS;
-#endif
+               m->m_pkthdr.ph_family = AF_MPLS;
                patch = gre_mpls_patch;
-               input = mpls_input;
                break;
 #endif
        case htons(0):
@@ -1189,22 +1180,8 @@ gre_input_key(struct mbuf **mp, int *off
 
        m->m_flags &= ~(M_MCAST|M_BCAST);
        m->m_flags |= mcast;
-       m->m_pkthdr.ph_ifidx = ifp->if_index;
-       m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
-
-#if NPF > 0
-       pf_pkt_addr_changed(m);
-#endif
-
-       counters_pkt(ifp->if_counters,
-           ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
-
-#if NBPFILTER > 0
-       if (ifp->if_bpf)
-               bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN);
-#endif
 
-       (*input)(ifp, m);
+       if_vinput(ifp, m);
        return (IPPROTO_DONE);
 decline:
        *mp = m;
@@ -1333,10 +1310,6 @@ egre_input(const struct gre_tunnel *key,
 
        m->m_flags &= ~(M_MCAST|M_BCAST);
 
-#if NPF > 0
-       pf_pkt_addr_changed(m);
-#endif
-
        gre_l2_prio(&sc->sc_tunnel, m, otos);
 
        if_vinput(&sc->sc_ac.ac_if, m);
@@ -1581,13 +1554,9 @@ nvgre_input(const struct gre_tunnel *key
        SET(m->m_pkthdr.csum_flags, M_FLOWID);
        m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
 
-       gre_l2_prio(&sc->sc_tunnel, m, otos);
-
        m->m_flags &= ~(M_MCAST|M_BCAST);
 
-#if NPF > 0
-       pf_pkt_addr_changed(m);
-#endif
+       gre_l2_prio(&sc->sc_tunnel, m, otos);
 
        if_vinput(&sc->sc_ac.ac_if, m);
 
@@ -4213,13 +4182,9 @@ eoip_input(struct gre_tunnel *key, struc
        if (m->m_pkthdr.len != len)
                m_adj(m, len - m->m_pkthdr.len);
 
-       gre_l2_prio(&sc->sc_tunnel, m, otos);
-
        m->m_flags &= ~(M_MCAST|M_BCAST);
 
-#if NPF > 0
-       pf_pkt_addr_changed(m);
-#endif
+       gre_l2_prio(&sc->sc_tunnel, m, otos);
 
        if_vinput(&sc->sc_ac.ac_if, m);
 
Index: net/if_mpe.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpe.c,v
retrieving revision 1.97
diff -u -p -r1.97 if_mpe.c
--- net/if_mpe.c        21 Aug 2020 22:59:27 -0000      1.97
+++ net/if_mpe.c        14 Feb 2021 12:12:21 -0000
@@ -108,6 +108,8 @@ mpe_clone_create(struct if_clone *ifc, i
        ifp->if_softc = sc;
        ifp->if_mtu = MPE_MTU;
        ifp->if_ioctl = mpe_ioctl;
+       ifp->if_bpf_mtap = p2p_bpf_mtap;
+       ifp->if_input = p2p_input;
        ifp->if_output = mpe_output;
        ifp->if_start = mpe_start;
        ifp->if_type = IFT_MPLS;
@@ -117,6 +119,8 @@ mpe_clone_create(struct if_clone *ifc, i
 
        if_attach(ifp);
        if_alloc_sadl(ifp);
+       if_counters_alloc(ifp);
+
 #if NBPFILTER > 0
        bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
 #endif
@@ -454,7 +458,6 @@ mpe_input(struct ifnet *ifp, struct mbuf
        struct mbuf     *n;
        uint8_t          ttl, tos;
        uint32_t         exp;
-       void (*input)(struct ifnet *, struct mbuf *);
        int rxprio = sc->sc_rxhprio;
 
        shim = mtod(m, struct shim_hdr *);
@@ -488,7 +491,7 @@ mpe_input(struct ifnet *ifp, struct mbuf
                        if (m == NULL)
                                return;
                }
-               input = ipv4_input;
+
                m->m_pkthdr.ph_family = AF_INET;
                break;
        }
@@ -510,7 +513,7 @@ mpe_input(struct ifnet *ifp, struct mbuf
                        if (m == NULL)
                                return;
                }
-               input = ipv6_input;
+
                m->m_pkthdr.ph_family = AF_INET6;
                break;
        }
@@ -534,21 +537,7 @@ mpe_input(struct ifnet *ifp, struct mbuf
                break;
        }
 
-       /* new receive if and move into correct rtable */
-       m->m_pkthdr.ph_ifidx = ifp->if_index;
-       m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
-
-       /* packet has not been processed by PF yet. */
-       KASSERT(m->m_pkthdr.pf.statekey == NULL);
-
-#if NBPFILTER > 0
-       if (ifp->if_bpf) {
-               bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
-                   m, BPF_DIRECTION_IN);
-       }
-#endif
-
-       (*input)(ifp, m);
+       if_vinput(ifp, m);
        return;
 drop:
        m_freem(m);
Index: net/if_mpip.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpip.c,v
retrieving revision 1.12
diff -u -p -r1.12 if_mpip.c
--- net/if_mpip.c       21 Aug 2020 22:59:27 -0000      1.12
+++ net/if_mpip.c       14 Feb 2021 12:12:21 -0000
@@ -112,6 +112,8 @@ mpip_clone_create(struct if_clone *ifc, 
        ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
        ifp->if_xflags = IFXF_CLONED;
        ifp->if_ioctl = mpip_ioctl;
+       ifp->if_bpf_mtap = p2p_bpf_mtap;
+       ifp->if_input = p2p_input;
        ifp->if_output = mpip_output;
        ifp->if_start = mpip_start;
        ifp->if_rtrequest = p2p_rtrequest;
@@ -460,7 +462,6 @@ mpip_input(struct mpip_softc *sc, struct
        uint32_t shim, exp;
        struct mbuf *n;
        uint8_t ttl, tos;
-       void (*input)(struct ifnet *, struct mbuf *);
 
        if (!ISSET(ifp->if_flags, IFF_RUNNING))
                goto drop;
@@ -552,7 +553,7 @@ mpip_input(struct mpip_softc *sc, struct
                        if (m == NULL)
                                return;
                }
-               input = ipv4_input;
+
                m->m_pkthdr.ph_family = AF_INET;
                break;
        }
@@ -574,7 +575,7 @@ mpip_input(struct mpip_softc *sc, struct
                        if (m == NULL)
                                return;
                }
-               input = ipv6_input;
+
                m->m_pkthdr.ph_family = AF_INET6;
                break;
        }
@@ -599,23 +600,7 @@ mpip_input(struct mpip_softc *sc, struct
                break;
        }
 
-       m->m_pkthdr.ph_ifidx = ifp->if_index;
-       m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
-
-       /* packet has not been processed by PF yet. */
-       KASSERT(m->m_pkthdr.pf.statekey == NULL);
-
-#if NBPFILTER > 0
-       {
-               caddr_t if_bpf = ifp->if_bpf;
-               if (if_bpf) {
-                       bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
-                           m, BPF_DIRECTION_IN);
-               }
-       }
-#endif
-
-       (*input)(ifp, m);
+       if_vinput(ifp, m);
        return;
 drop:
        m_freem(m);
Index: net/if_tun.c
===================================================================
RCS file: /cvs/src/sys/net/if_tun.c,v
retrieving revision 1.229
diff -u -p -r1.229 if_tun.c
--- net/if_tun.c        19 Jan 2021 19:39:58 -0000      1.229
+++ net/if_tun.c        14 Feb 2021 12:12:21 -0000
@@ -239,6 +239,7 @@ tun_create(struct if_clone *ifc, int uni
        if_counters_alloc(ifp);
 
        if ((flags & TUN_LAYER2) == 0) {
+               ifp->if_bpf_mtap = bpf_mtap;
                ifp->if_input = tun_input;
                ifp->if_output = tun_output;
                ifp->if_mtu = ETHERMTU;
Index: net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.112
diff -u -p -r1.112 if_var.h
--- net/if_var.h        29 Jul 2020 12:09:31 -0000      1.112
+++ net/if_var.h        14 Feb 2021 12:12:21 -0000
@@ -161,6 +161,7 @@ struct ifnet {                              /* and the 
entries */
 
        /* procedure handles */
        void    (*if_input)(struct ifnet *, struct mbuf *);
+       int     (*if_bpf_mtap)(caddr_t, const struct mbuf *, u_int);
        int     (*if_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
                     struct rtentry *); /* output routine (enqueue) */
                                        /* link level output function */
@@ -326,6 +327,8 @@ int if_input_local(struct ifnet *, struc
 int    if_output_local(struct ifnet *, struct mbuf *, sa_family_t);
 void   if_rtrequest_dummy(struct ifnet *, int, struct rtentry *);
 void   p2p_rtrequest(struct ifnet *, int, struct rtentry *);
+void   p2p_input(struct ifnet *, struct mbuf *);
+int    p2p_bpf_mtap(caddr_t, const struct mbuf *, u_int);
 
 struct ifaddr *ifa_ifwithaddr(struct sockaddr *, u_int);
 struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *, u_int);
Index: net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
retrieving revision 1.41
diff -u -p -r1.41 ifq.c
--- net/ifq.c   7 Jul 2020 00:00:03 -0000       1.41
+++ net/ifq.c   14 Feb 2021 12:12:21 -0000
@@ -693,7 +693,7 @@ ifiq_input(struct ifiqueue *ifiq, struct
                ml_init(ml);
 
                while ((m = ml_dequeue(&ml0)) != NULL) {
-                       if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
+                       if (ifp->if_bpf_mtap(if_bpf, m, BPF_DIRECTION_IN))
                                m_freem(m);
                        else
                                ml_enqueue(ml, m);

Reply via email to