On Sun, Feb 07, 2021 at 06:55:37PM +0100, Sebastian Benoit wrote: > David Gwynne(da...@gwynne.id.au) on 2021.01.27 17:13:09 +1000: > > some of the discussion around dup-to made me think that a diff we > > have here at work might be more broadly useful. > > > > we run a box here with a bunch of ethernet ports plugged into span > > ports on switches. basically every packet going to our firewalls gets > > duplicated to this host. we then have code that generates flow data from > > these ports. it's also nice to have one place to ssh to and so you can > > tcpdump things. anyway, that flow collector watches packets on those > > interfaces via bpf, but apart from that we don't actually want to > > do anythign with the packets those interfaces receive. we especially > > do not want them entering the stack. we ssh to this box over the > > firewall, so if the span port copies those packets to the box and > > the stack tries to process them, things dont work great. > > > > we could enable the fildrop stuff with bpf, but there's an annoying gap > > between when the interfaces come up and when the flow collector starts > > running. also, if the flow collector crashes or we restart it cos we're > > hacking on the code, this provides more gaps for packets to enter the > > stack. > > > > we prevented this by adding a "monitor" interface flag. it makes the > > interface input code drop all the packets rather than queuing them for > > the stack to process. > > > > is there any interest in having this in the tree? > > > > if so, i need to do some work to make sure all interfaces push > > packets into the stack with if_input, ifiq_input, or if_vinput. a > > bunch of them like gif and gre currently call protocol input routines > > directly, so they skip this check. > > > > so, thoughts? > > I'd like this. > > Previously when i needed something similar, i put the interface into its own > routing domain. But of course that doesnt avoid the packets entering the > stack, just some consequences. > > I also think 'monitor' is the right keyword for ifconfig. > > ok benno, but manpage is missing
this is also missing. this lets l3 interfaces use the if_vinput machinery by providing a p2p_input handler. for if_vinput to support p2p interfaces, they have to be able to say what kind of bpf_mtap handling they need rather than have the machinery assume everything is an ethernet packet. this also lets us factor out the l3 input handling from a lot of these drivers. in turn, this makes it possible to use monitor on gif, gre, mgre, mpe, and mpip. looks like it would already work on tun, but im not sure what the point of that is. ok? Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.625 diff -u -p -r1.625 if.c --- net/if.c 18 Jan 2021 09:55:43 -0000 1.625 +++ net/if.c 14 Feb 2021 12:12:21 -0000 @@ -847,13 +847,17 @@ if_vinput(struct ifnet *ifp, struct mbuf m->m_pkthdr.ph_ifidx = ifp->if_index; m->m_pkthdr.ph_rtableid = ifp->if_rdomain; +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + counters_pkt(ifp->if_counters, ifc_ipackets, ifc_ibytes, m->m_pkthdr.len); #if NBPFILTER > 0 if_bpf = ifp->if_bpf; if (if_bpf) { - if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) { + if (ifp->if_bpf_mtap(if_bpf, m, BPF_DIRECTION_IN)) { m_freem(m); return; } @@ -1497,6 +1501,42 @@ p2p_rtrequest(struct ifnet *ifp, int req } } +int +p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir) +{ +#if NBPFILTER > 0 + return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir)); +#else + return (0); +#endif +} + +void +p2p_input(struct ifnet *ifp, struct mbuf *m) +{ + void (*input)(struct ifnet *, struct mbuf *); + + switch (m->m_pkthdr.ph_family) { + case AF_INET: + input = ipv4_input; + break; +#ifdef INET6 + case AF_INET6: + input = ipv6_input; + break; +#endif +#ifdef MPLS + case AF_MPLS: + input = mpls_input; + break; +#endif + default: + m_freem(m); + return; + } + + (*input)(ifp, m); +} /* * Bring down all interfaces Index: net/if_ethersubr.c =================================================================== RCS file: /cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.268 diff -u -p -r1.268 if_ethersubr.c --- net/if_ethersubr.c 4 Jan 2021 21:21:41 -0000 1.268 +++ net/if_ethersubr.c 14 Feb 2021 12:12:21 -0000 @@ -680,7 +680,9 @@ ether_ifattach(struct ifnet *ifp) if_alloc_sadl(ifp); memcpy(LLADDR(ifp->if_sadl), ac->ac_enaddr, ifp->if_addrlen); LIST_INIT(&ac->ac_multiaddrs); + #if NBPFILTER > 0 + ifp->if_bpf_mtap = bpf_mtap_ether; bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN); #endif } Index: net/if_gif.c =================================================================== RCS file: /cvs/src/sys/net/if_gif.c,v retrieving revision 1.131 diff -u -p -r1.131 if_gif.c --- net/if_gif.c 21 Aug 2020 22:59:27 -0000 1.131 +++ net/if_gif.c 14 Feb 2021 12:12:21 -0000 @@ -166,6 +166,8 @@ gif_clone_create(struct if_clone *ifc, i ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; ifp->if_xflags = IFXF_CLONED; ifp->if_ioctl = gif_ioctl; + ifp->if_bpf_mtap = p2p_bpf_mtap; + ifp->if_input = p2p_input; ifp->if_start = gif_start; ifp->if_output = gif_output; ifp->if_rtrequest = p2p_rtrequest; @@ -174,6 +176,7 @@ gif_clone_create(struct if_clone *ifc, i if_attach(ifp); if_alloc_sadl(ifp); + if_counters_alloc(ifp); #if NBPFILTER > 0 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); @@ -787,7 +790,6 @@ gif_input(struct gif_tunnel *key, struct struct mbuf *m = *mp; struct gif_softc *sc; struct ifnet *ifp; - void (*input)(struct ifnet *, struct mbuf *); uint8_t itos; int rxhprio; @@ -830,7 +832,6 @@ gif_input(struct gif_tunnel *key, struct ip_tos_patch(ip, itos); m->m_pkthdr.ph_family = AF_INET; - input = ipv4_input; break; } #ifdef INET6 @@ -851,7 +852,6 @@ gif_input(struct gif_tunnel *key, struct SET(ip6->ip6_flow, htonl(itos << 20)); m->m_pkthdr.ph_family = AF_INET6; - input = ipv6_input; break; } #endif /* INET6 */ @@ -866,7 +866,6 @@ gif_input(struct gif_tunnel *key, struct itos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5; m->m_pkthdr.ph_family = AF_MPLS; - input = mpls_input; break; } #endif /* MPLS */ @@ -875,8 +874,6 @@ gif_input(struct gif_tunnel *key, struct } m->m_flags &= ~(M_MCAST|M_BCAST); - m->m_pkthdr.ph_ifidx = ifp->if_index; - m->m_pkthdr.ph_rtableid = ifp->if_rdomain; switch (rxhprio) { case IF_HDRPRIO_PACKET: @@ -893,25 +890,8 @@ gif_input(struct gif_tunnel *key, struct break; } -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - - ifp->if_ipackets++; - ifp->if_ibytes += m->m_pkthdr.len; - -#if NBPFILTER > 0 - { - caddr_t if_bpf = ifp->if_bpf; - if (if_bpf) { - bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, - m, BPF_DIRECTION_IN); - } - } -#endif - *mp = NULL; - (*input)(ifp, m); + if_vinput(ifp, m); return (IPPROTO_DONE); drop: Index: net/if_gre.c =================================================================== RCS file: /cvs/src/sys/net/if_gre.c,v retrieving revision 1.164 diff -u -p -r1.164 if_gre.c --- net/if_gre.c 19 Jan 2021 07:31:47 -0000 1.164 +++ net/if_gre.c 14 Feb 2021 12:12:21 -0000 @@ -590,6 +590,8 @@ gre_clone_create(struct if_clone *ifc, i ifp->if_mtu = GREMTU; ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; ifp->if_xflags = IFXF_CLONED; + ifp->if_bpf_mtap = p2p_bpf_mtap; + ifp->if_input = p2p_input; ifp->if_output = gre_output; ifp->if_start = gre_start; ifp->if_ioctl = gre_ioctl; @@ -659,6 +661,8 @@ mgre_clone_create(struct if_clone *ifc, ifp->if_mtu = GREMTU; ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX; ifp->if_xflags = IFXF_CLONED; + ifp->if_bpf_mtap = p2p_bpf_mtap; + ifp->if_input = p2p_input; ifp->if_rtrequest = mgre_rtrequest; ifp->if_output = mgre_output; ifp->if_start = mgre_start; @@ -1006,12 +1010,8 @@ gre_input_key(struct mbuf **mp, int *off caddr_t buf; struct gre_header *gh; struct gre_h_key *gkh; - void (*input)(struct ifnet *, struct mbuf *); struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *, uint8_t *, uint8_t); -#if NBPFILTER > 0 - int bpf_af = AF_UNSPEC; /* bpf */ -#endif int mcast = 0; uint8_t itos; @@ -1114,19 +1114,13 @@ gre_input_key(struct mbuf **mp, int *off /* FALLTHROUGH */ } case htons(ETHERTYPE_IP): -#if NBPFILTER > 0 - bpf_af = AF_INET; -#endif + m->m_pkthdr.ph_family = AF_INET; patch = gre_ipv4_patch; - input = ipv4_input; break; #ifdef INET6 case htons(ETHERTYPE_IPV6): -#if NBPFILTER > 0 - bpf_af = AF_INET6; -#endif + m->m_pkthdr.ph_family = AF_INET6; patch = gre_ipv6_patch; - input = ipv6_input; break; #endif #ifdef MPLS @@ -1134,11 +1128,8 @@ gre_input_key(struct mbuf **mp, int *off mcast = M_MCAST|M_BCAST; /* fallthrough */ case htons(ETHERTYPE_MPLS): -#if NBPFILTER > 0 - bpf_af = AF_MPLS; -#endif + m->m_pkthdr.ph_family = AF_MPLS; patch = gre_mpls_patch; - input = mpls_input; break; #endif case htons(0): @@ -1189,22 +1180,8 @@ gre_input_key(struct mbuf **mp, int *off m->m_flags &= ~(M_MCAST|M_BCAST); m->m_flags |= mcast; - m->m_pkthdr.ph_ifidx = ifp->if_index; - m->m_pkthdr.ph_rtableid = ifp->if_rdomain; - -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - - counters_pkt(ifp->if_counters, - ifc_ipackets, ifc_ibytes, m->m_pkthdr.len); - -#if NBPFILTER > 0 - if (ifp->if_bpf) - bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN); -#endif - (*input)(ifp, m); + if_vinput(ifp, m); return (IPPROTO_DONE); decline: *mp = m; @@ -1333,10 +1310,6 @@ egre_input(const struct gre_tunnel *key, m->m_flags &= ~(M_MCAST|M_BCAST); -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - gre_l2_prio(&sc->sc_tunnel, m, otos); if_vinput(&sc->sc_ac.ac_if, m); @@ -1581,13 +1554,9 @@ nvgre_input(const struct gre_tunnel *key SET(m->m_pkthdr.csum_flags, M_FLOWID); m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY; - gre_l2_prio(&sc->sc_tunnel, m, otos); - m->m_flags &= ~(M_MCAST|M_BCAST); -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif + gre_l2_prio(&sc->sc_tunnel, m, otos); if_vinput(&sc->sc_ac.ac_if, m); @@ -4213,13 +4182,9 @@ eoip_input(struct gre_tunnel *key, struc if (m->m_pkthdr.len != len) m_adj(m, len - m->m_pkthdr.len); - gre_l2_prio(&sc->sc_tunnel, m, otos); - m->m_flags &= ~(M_MCAST|M_BCAST); -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif + gre_l2_prio(&sc->sc_tunnel, m, otos); if_vinput(&sc->sc_ac.ac_if, m); Index: net/if_mpe.c =================================================================== RCS file: /cvs/src/sys/net/if_mpe.c,v retrieving revision 1.97 diff -u -p -r1.97 if_mpe.c --- net/if_mpe.c 21 Aug 2020 22:59:27 -0000 1.97 +++ net/if_mpe.c 14 Feb 2021 12:12:21 -0000 @@ -108,6 +108,8 @@ mpe_clone_create(struct if_clone *ifc, i ifp->if_softc = sc; ifp->if_mtu = MPE_MTU; ifp->if_ioctl = mpe_ioctl; + ifp->if_bpf_mtap = p2p_bpf_mtap; + ifp->if_input = p2p_input; ifp->if_output = mpe_output; ifp->if_start = mpe_start; ifp->if_type = IFT_MPLS; @@ -117,6 +119,8 @@ mpe_clone_create(struct if_clone *ifc, i if_attach(ifp); if_alloc_sadl(ifp); + if_counters_alloc(ifp); + #if NBPFILTER > 0 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); #endif @@ -454,7 +458,6 @@ mpe_input(struct ifnet *ifp, struct mbuf struct mbuf *n; uint8_t ttl, tos; uint32_t exp; - void (*input)(struct ifnet *, struct mbuf *); int rxprio = sc->sc_rxhprio; shim = mtod(m, struct shim_hdr *); @@ -488,7 +491,7 @@ mpe_input(struct ifnet *ifp, struct mbuf if (m == NULL) return; } - input = ipv4_input; + m->m_pkthdr.ph_family = AF_INET; break; } @@ -510,7 +513,7 @@ mpe_input(struct ifnet *ifp, struct mbuf if (m == NULL) return; } - input = ipv6_input; + m->m_pkthdr.ph_family = AF_INET6; break; } @@ -534,21 +537,7 @@ mpe_input(struct ifnet *ifp, struct mbuf break; } - /* new receive if and move into correct rtable */ - m->m_pkthdr.ph_ifidx = ifp->if_index; - m->m_pkthdr.ph_rtableid = ifp->if_rdomain; - - /* packet has not been processed by PF yet. */ - KASSERT(m->m_pkthdr.pf.statekey == NULL); - -#if NBPFILTER > 0 - if (ifp->if_bpf) { - bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, - m, BPF_DIRECTION_IN); - } -#endif - - (*input)(ifp, m); + if_vinput(ifp, m); return; drop: m_freem(m); Index: net/if_mpip.c =================================================================== RCS file: /cvs/src/sys/net/if_mpip.c,v retrieving revision 1.12 diff -u -p -r1.12 if_mpip.c --- net/if_mpip.c 21 Aug 2020 22:59:27 -0000 1.12 +++ net/if_mpip.c 14 Feb 2021 12:12:21 -0000 @@ -112,6 +112,8 @@ mpip_clone_create(struct if_clone *ifc, ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; ifp->if_xflags = IFXF_CLONED; ifp->if_ioctl = mpip_ioctl; + ifp->if_bpf_mtap = p2p_bpf_mtap; + ifp->if_input = p2p_input; ifp->if_output = mpip_output; ifp->if_start = mpip_start; ifp->if_rtrequest = p2p_rtrequest; @@ -460,7 +462,6 @@ mpip_input(struct mpip_softc *sc, struct uint32_t shim, exp; struct mbuf *n; uint8_t ttl, tos; - void (*input)(struct ifnet *, struct mbuf *); if (!ISSET(ifp->if_flags, IFF_RUNNING)) goto drop; @@ -552,7 +553,7 @@ mpip_input(struct mpip_softc *sc, struct if (m == NULL) return; } - input = ipv4_input; + m->m_pkthdr.ph_family = AF_INET; break; } @@ -574,7 +575,7 @@ mpip_input(struct mpip_softc *sc, struct if (m == NULL) return; } - input = ipv6_input; + m->m_pkthdr.ph_family = AF_INET6; break; } @@ -599,23 +600,7 @@ mpip_input(struct mpip_softc *sc, struct break; } - m->m_pkthdr.ph_ifidx = ifp->if_index; - m->m_pkthdr.ph_rtableid = ifp->if_rdomain; - - /* packet has not been processed by PF yet. */ - KASSERT(m->m_pkthdr.pf.statekey == NULL); - -#if NBPFILTER > 0 - { - caddr_t if_bpf = ifp->if_bpf; - if (if_bpf) { - bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, - m, BPF_DIRECTION_IN); - } - } -#endif - - (*input)(ifp, m); + if_vinput(ifp, m); return; drop: m_freem(m); Index: net/if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v retrieving revision 1.229 diff -u -p -r1.229 if_tun.c --- net/if_tun.c 19 Jan 2021 19:39:58 -0000 1.229 +++ net/if_tun.c 14 Feb 2021 12:12:21 -0000 @@ -239,6 +239,7 @@ tun_create(struct if_clone *ifc, int uni if_counters_alloc(ifp); if ((flags & TUN_LAYER2) == 0) { + ifp->if_bpf_mtap = bpf_mtap; ifp->if_input = tun_input; ifp->if_output = tun_output; ifp->if_mtu = ETHERMTU; Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.112 diff -u -p -r1.112 if_var.h --- net/if_var.h 29 Jul 2020 12:09:31 -0000 1.112 +++ net/if_var.h 14 Feb 2021 12:12:21 -0000 @@ -161,6 +161,7 @@ struct ifnet { /* and the entries */ /* procedure handles */ void (*if_input)(struct ifnet *, struct mbuf *); + int (*if_bpf_mtap)(caddr_t, const struct mbuf *, u_int); int (*if_output)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); /* output routine (enqueue) */ /* link level output function */ @@ -326,6 +327,8 @@ int if_input_local(struct ifnet *, struc int if_output_local(struct ifnet *, struct mbuf *, sa_family_t); void if_rtrequest_dummy(struct ifnet *, int, struct rtentry *); void p2p_rtrequest(struct ifnet *, int, struct rtentry *); +void p2p_input(struct ifnet *, struct mbuf *); +int p2p_bpf_mtap(caddr_t, const struct mbuf *, u_int); struct ifaddr *ifa_ifwithaddr(struct sockaddr *, u_int); struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *, u_int); Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.41 diff -u -p -r1.41 ifq.c --- net/ifq.c 7 Jul 2020 00:00:03 -0000 1.41 +++ net/ifq.c 14 Feb 2021 12:12:21 -0000 @@ -693,7 +693,7 @@ ifiq_input(struct ifiqueue *ifiq, struct ml_init(ml); while ((m = ml_dequeue(&ml0)) != NULL) { - if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) + if (ifp->if_bpf_mtap(if_bpf, m, BPF_DIRECTION_IN)) m_freem(m); else ml_enqueue(ml, m);