while we were working on making the various pseudo interfaces you
stack on top of ethernet mpsafe, we split their input processing
off so they could be attacked one by one. they're all mpsafe now,
so this separation is not strictly necessary anymore.

this moves carp and vlan input back into ether_input. a lot of care
is taken to correctly order when we give the packets to the sub
interfaces. basically, any ethernet packet with a vlan tag is
unconditionally given to vlan_input. carp input is only attempted
if the packet is not for the parent interface, but before the
multicast handling is done. by checking the interfaces mac address
first, carp interfaces can get their packets immediately, which
means we can  stop messing around with the M_BCAST and M_MCAST flags
on carp mbufs. the relevant chunk of code is:

#if NVLAN > 0
        if (ISSET(m->m_flags, M_VLANTAG) ||
            etype == ETHERTYPE_VLAN ||
            etype == ETHERTYPE_QINQ) {
                vlan_input(ifp, ml, m);
                return;
        }
#endif

        if (memcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN) != 0) {
                /* The packet doesn't match the ether addr on this iface */

#if NCARP > 0
                /* It may be addressed to a child carp iface */
                if (ifp->if_type != IFT_CARP &&
                    !SRPL_EMPTY_LOCKED(&ifp->if_carp) &&
                    carp_input(ifp, ml, m)) {
                        /* carp_input has consumed the packet */
                        return;
                }
#endif

                /* It must be multicast if it isn't for us or a child carp */
                if (!ETHER_IS_MULTICAST(eh->ether_dhost))
                        goto dropanyway;

                /* Drop it if it came from us in the first place */
                if (!ISSET(ifp->if_flags, IFF_SIMPLEX) &&
                    memcmp(ac->ac_enaddr, eh->ether_shost,
                     ETHER_ADDR_LEN) == 0)
                        goto dropanyway;

                SET(m->m_flags, (memcmp(etherbroadcastaddr, eh->ether_dhost,
                    ETHER_ADDR_LEN) == 0) ? M_BCAST : M_MCAST);
                ifp->if_imcasts++; /* XXX lock? */
        }

doing vlan and carp input here let's us remove special handling for
VLAN packets in carp code.

this diff also gets rid of the use of the pseudo interfaces input
queues, it processes their packets off an mbuf list for each real
ethernet packet. if we can tie all the work done on behalf of a
physical ring to a single task it makes rx ring moderation for
physical interfaces a lot easier to implement.

note that trunk and bridge/switch are still implemented using
interface input handlers at the moment.

ok?

Index: net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.250
diff -u -p -r1.250 if_ethersubr.c
--- net/if_ethersubr.c  10 Jan 2018 00:14:38 -0000      1.250
+++ net/if_ethersubr.c  11 Jan 2018 01:20:54 -0000
@@ -103,6 +103,16 @@ didn't get a copy, you may request one f
 #include <net/bpf.h>
 #endif
 
+#include "vlan.h"
+#if NVLAN > 0
+#include <net/if_vlan_var.h>
+#endif
+
+#include "carp.h"
+#if NCARP > 0
+#include <netinet/ip_carp.h>
+#endif
+
 #include "pppoe.h"
 #if NPPPOE > 0
 #include <net/if_pppoe.h>
@@ -121,6 +131,8 @@ didn't get a copy, you may request one f
 #include <netmpls/mpls.h>
 #endif /* MPLS */
 
+void   ether_input_m(struct ifnet *, struct mbuf_list *, struct mbuf *);
+
 u_int8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
     { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 u_int8_t etheranyaddr[ETHER_ADDR_LEN] =
@@ -306,69 +318,108 @@ bad:
        return (error);
 }
 
-/*
- * Process a received Ethernet packet;
- * the packet is in the mbuf chain m without
- * the ether header, which is provided separately.
- */
+void
+ether_enqueue(struct ifnet *ifp, struct mbuf_list *ml, struct mbuf *m)
+{
+#if NBPFILTER > 0
+       caddr_t if_bpf;
+#endif
+
+       m->m_pkthdr.ph_ifidx = ifp->if_index;
+       m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
+
+       /* XXX lock? */
+       ifp->if_ipackets++;
+       ifp->if_ibytes += m->m_pkthdr.len;
+
+#if NBPFILTER > 0
+       if_bpf = ifp->if_bpf;
+       if (if_bpf) {
+               if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
+                       m_freem(m);
+                       return;
+               }
+       }
+#endif
+
+       ml_enqueue(ml, m);
+}
+
 int
 ether_input(struct ifnet *ifp, struct mbuf *m, void *cookie)
 {
+       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+
+       /* Drop short frames */
+       if (m->m_len < ETHER_HDR_LEN) {
+               m_freem(m);
+               return (1);
+       }
+
+       /* We have a reference to this ifp already */
+       ether_input_m(ifp, &ml, m);
+
+       /* Run the packet through any child interfaces */
+       while ((m = ml_dequeue(&ml)) != NULL) {
+               ifp = if_get(m->m_pkthdr.ph_ifidx);
+               if (ifp != NULL)
+                       ether_input_m(ifp, &ml, m);
+               else
+                       m_freem(m);
+               if_put(ifp);
+       }
+
+       return (1);
+}
+
+void
+ether_input_m(struct ifnet *ifp, struct mbuf_list *ml, struct mbuf *m)
+{
        struct ether_header *eh;
        void (*input)(struct ifnet *, struct mbuf *);
        u_int16_t etype;
        struct arpcom *ac;
 
-       /* Drop short frames */
-       if (m->m_len < ETHER_HDR_LEN)
-               goto dropanyway;
-
        ac = (struct arpcom *)ifp;
        eh = mtod(m, struct ether_header *);
+       etype = ntohs(eh->ether_type);
 
-       if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
-               /*
-                * If this is not a simplex interface, drop the packet
-                * if it came from us.
-                */
-               if ((ifp->if_flags & IFF_SIMPLEX) == 0) {
-                       if (memcmp(ac->ac_enaddr, eh->ether_shost,
-                           ETHER_ADDR_LEN) == 0) {
-                               m_freem(m);
-                               return (1);
-                       }
-               }
-
-               if (memcmp(etherbroadcastaddr, eh->ether_dhost,
-                   sizeof(etherbroadcastaddr)) == 0)
-                       m->m_flags |= M_BCAST;
-               else
-                       m->m_flags |= M_MCAST;
-               ifp->if_imcasts++;
+#if NVLAN > 0
+       if (ISSET(m->m_flags, M_VLANTAG) ||
+           etype == ETHERTYPE_VLAN ||
+           etype == ETHERTYPE_QINQ) {
+               vlan_input(ifp, ml, m);
+               return;
        }
+#endif
 
-       /*
-        * HW vlan tagged packets that were not collected by vlan(4) must
-        * be dropped now.
-        */
-       if (m->m_flags & M_VLANTAG) {
-               m_freem(m);
-               return (1);
-       }
+       if (memcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN) != 0) {
+               /* The packet doesn't match the ether addr on this iface */
 
-       /*
-        * If packet is unicast, make sure it is for us.  Drop otherwise.
-        * This check is required in promiscous mode, and for some hypervisors
-        * where the MAC filter is 'best effort' only.
-        */
-       if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
-               if (memcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN)) {
-                       m_freem(m);
-                       return (1);
+#if NCARP > 0
+               /* It may be addressed to a child carp iface */
+               if (ifp->if_type != IFT_CARP &&
+                   !SRPL_EMPTY_LOCKED(&ifp->if_carp) &&
+                   carp_input(ifp, ml, m)) {
+                       /* carp_input has consumed the packet */
+                       return;
                }
-       }
+#endif
 
-       etype = ntohs(eh->ether_type);
+               /* It must be multicast if it isn't for us or a child carp */
+               if (!ETHER_IS_MULTICAST(eh->ether_dhost))
+                       goto dropanyway;
+
+               /* Drop it if it came from us in the first place */
+               if (!ISSET(ifp->if_flags, IFF_SIMPLEX) &&
+                   memcmp(ac->ac_enaddr, eh->ether_shost,
+                     ETHER_ADDR_LEN) == 0)
+                       goto dropanyway;
+
+               SET(m->m_flags, (memcmp(etherbroadcastaddr, eh->ether_dhost,
+                   ETHER_ADDR_LEN) == 0) ? M_BCAST : M_MCAST);
+               ifp->if_imcasts++; /* XXX lock? */
+       }
 
        switch (etype) {
        case ETHERTYPE_IP:
@@ -406,7 +457,7 @@ ether_input(struct ifnet *ifp, struct mb
 
                        if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
                                pipex_pppoe_input(m, session);
-                               return (1);
+                               return;
                        }
                }
 #endif
@@ -414,7 +465,7 @@ ether_input(struct ifnet *ifp, struct mb
                        niq_enqueue(&pppoediscinq, m);
                else
                        niq_enqueue(&pppoeinq, m);
-               return (1);
+               return;
 #endif
 #ifdef MPLS
        case ETHERTYPE_MPLS:
@@ -428,10 +479,9 @@ ether_input(struct ifnet *ifp, struct mb
 
        m_adj(m, sizeof(*eh));
        (*input)(ifp, m);
-       return (1);
+       return;
 dropanyway:
        m_freem(m);
-       return (1);
 }
 
 /*
Index: net/if_vlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.175
diff -u -p -r1.175 if_vlan.c
--- net/if_vlan.c       9 Jan 2018 15:24:24 -0000       1.175
+++ net/if_vlan.c       11 Jan 2018 01:20:55 -0000
@@ -84,7 +84,6 @@ void  vlanattach(int count);
 int    vlan_clone_create(struct if_clone *, int);
 int    vlan_clone_destroy(struct ifnet *);
 
-int    vlan_input(struct ifnet *, struct mbuf *, void *);
 void   vlan_start(struct ifqueue *ifq);
 int    vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
 
@@ -325,11 +324,8 @@ vlan_inject(struct mbuf *m, uint16_t typ
        return (m);
  }
 
-/*
- * vlan_input() returns 1 if it has consumed the packet, 0 otherwise.
- */
-int
-vlan_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
+void
+vlan_input(struct ifnet *ifp0, struct mbuf_list *ml, struct mbuf *m)
 {
        struct ifvlan                   *ifv;
        struct ether_vlan_header        *evl;
@@ -337,28 +333,22 @@ vlan_input(struct ifnet *ifp0, struct mb
        SRPL_HEAD(, ifvlan)             *tagh, *list;
        struct srp_ref                   sr;
        u_int                            tag;
-       struct mbuf_list                 ml = MBUF_LIST_INITIALIZER();
        u_int16_t                        etype;
 
-       eh = mtod(m, struct ether_header *);
-       etype = ntohs(eh->ether_type);
-
        if (m->m_flags & M_VLANTAG) {
                etype = ETHERTYPE_VLAN;
                tagh = vlan_tagh;
-       } else if ((etype == ETHERTYPE_VLAN) || (etype == ETHERTYPE_QINQ)) {
+       } else {
                if (m->m_len < sizeof(*evl) &&
                    (m = m_pullup(m, sizeof(*evl))) == NULL) {
                        ifp0->if_ierrors++;
-                       return (1);
+                       return;
                }
 
                evl = mtod(m, struct ether_vlan_header *);
                m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
+               etype = ntohs(evl->evl_encap_proto);
                tagh = etype == ETHERTYPE_QINQ ? svlan_tagh : vlan_tagh;
-       } else {
-               /* Skip non-VLAN packets. */
-               return (0);
        }
 
        /* From now on ether_vtag is fine */
@@ -376,15 +366,11 @@ vlan_input(struct ifnet *ifp0, struct mb
                        break;
        }
 
-       if (ifv == NULL) {
+       if (ifv == NULL || !ISSET(ifv->ifv_if.if_flags, IFF_RUNNING)) {
                ifp0->if_noproto++;
                goto drop;
        }
 
-       if ((ifv->ifv_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
-           (IFF_UP|IFF_RUNNING))
-               goto drop;
-
        /*
         * Having found a valid vlan interface corresponding to
         * the given source interface and vlan tag, remove the
@@ -393,20 +379,19 @@ vlan_input(struct ifnet *ifp0, struct mb
        if (m->m_flags & M_VLANTAG) {
                m->m_flags &= ~M_VLANTAG;
        } else {
+               eh = mtod(m, struct ether_header *);
                eh->ether_type = evl->evl_proto;
                memmove((char *)eh + EVL_ENCAPLEN, eh, sizeof(*eh));
                m_adj(m, EVL_ENCAPLEN);
        }
 
-       ml_enqueue(&ml, m);
-       if_input(&ifv->ifv_if, &ml);
+       ether_enqueue(&ifv->ifv_if, ml, m);
        SRPL_LEAVE(&sr);
-       return (1);
+       return;
 
 drop:
        SRPL_LEAVE(&sr);
        m_freem(m);
-       return (1);
 }
 
 int
@@ -430,8 +415,6 @@ vlan_parent_up(struct ifvlan *ifv, struc
 
        vlan_multi_apply(ifv, ifp0, SIOCADDMULTI);
 
-       if_ih_insert(ifp0, vlan_input, NULL);
-
        return (0);
 }
 
@@ -549,7 +532,6 @@ vlan_down(struct ifvlan *ifv)
 
        ifp0 = if_get(ifv->ifv_ifp0);
        if (ifp0 != NULL) {
-               if_ih_remove(ifp0, vlan_input, NULL);
                if (ISSET(ifv->ifv_flags, IFVF_PROMISC))
                        ifpromisc(ifp0, 0);
                vlan_multi_apply(ifv, ifp0, SIOCDELMULTI);
Index: net/if_vlan_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_vlan_var.h,v
retrieving revision 1.37
diff -u -p -r1.37 if_vlan_var.h
--- net/if_vlan_var.h   24 Jan 2017 10:08:30 -0000      1.37
+++ net/if_vlan_var.h   11 Jan 2018 01:20:55 -0000
@@ -85,6 +85,7 @@ struct        ifvlan {
 #define        IFVF_LLADDR     0x02    /* don't inherit the parents mac */
 
 struct mbuf    *vlan_inject(struct mbuf *, uint16_t, uint16_t);
+void            vlan_input(struct ifnet *, struct mbuf_list *, struct mbuf *);
 #endif /* _KERNEL */
 
 #endif /* _NET_IF_VLAN_VAR_H_ */
Index: netinet/if_ether.h
===================================================================
RCS file: /cvs/src/sys/netinet/if_ether.h,v
retrieving revision 1.73
diff -u -p -r1.73 if_ether.h
--- netinet/if_ether.h  29 Nov 2016 10:09:57 -0000      1.73
+++ netinet/if_ether.h  11 Jan 2018 01:20:56 -0000
@@ -240,6 +240,7 @@ void        ether_ifattach(struct ifnet *);
 void   ether_ifdetach(struct ifnet *);
 int    ether_ioctl(struct ifnet *, struct arpcom *, u_long, caddr_t);
 int    ether_input(struct ifnet *, struct mbuf *, void *);
+void   ether_enqueue(struct ifnet *, struct mbuf_list *, struct mbuf *);
 int    ether_output(struct ifnet *,
            struct mbuf *, struct sockaddr *, struct rtentry *);
 void   ether_rtrequest(struct ifnet *, int, struct rtentry *);
Index: netinet/ip_carp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.324
diff -u -p -r1.324 ip_carp.c
--- netinet/ip_carp.c   11 Jan 2018 00:14:15 -0000      1.324
+++ netinet/ip_carp.c   11 Jan 2018 01:20:56 -0000
@@ -207,7 +207,6 @@ void        carp_hmac_generate(struct carp_vhos
            unsigned char *, u_int8_t);
 int    carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
            unsigned char *);
-int    carp_input(struct ifnet *, struct mbuf *, void *);
 void   carp_proto_input_c(struct ifnet *, struct mbuf *,
            struct carp_header *, int, sa_family_t);
 int    carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
@@ -926,9 +925,6 @@ carpdetach(struct carp_softc *sc)
 
        cif = &ifp0->if_carp;
 
-       /* Restore previous input handler. */
-       if_ih_remove(ifp0, carp_input, NULL);
-
        if (sc->lh_cookie != NULL)
                hook_disestablish(ifp0->if_linkstatehooks, sc->lh_cookie);
 
@@ -1379,58 +1375,32 @@ carp_vhe_match(struct carp_softc *sc, ui
 }
 
 int
-carp_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
+carp_input(struct ifnet *ifp0, struct mbuf_list *ml, struct mbuf *m)
 {
        struct ether_header *eh;
-       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
        struct srpl *cif;
        struct carp_softc *sc;
+       struct ifnet *ifp;
        struct srp_ref sr;
 
-#if NVLAN > 0
-       /*
-        * If the underlying interface removed the VLAN header itself,
-        * it's not for us.
-        */
-       if (ISSET(m->m_flags, M_VLANTAG))
-               return (0);
-#endif
-
        eh = mtod(m, struct ether_header *);
        cif = &ifp0->if_carp;
 
        SRPL_FOREACH(sc, &sr, cif, sc_list) {
-               if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
-                   (IFF_UP|IFF_RUNNING))
+               ifp = &sc->sc_if;
+
+               if (!ISSET(ifp->if_flags, IFF_RUNNING))
                        continue;
 
                if (carp_vhe_match(sc, eh->ether_dhost)) {
-                       /*
-                        * These packets look like layer 2 multicast but they
-                        * are unicast at layer 3. With help of the tag the
-                        * mbuf's M_MCAST flag can be removed by carp_lsdrop()
-                        * after we have passed layer 2.
-                        */
-                       if (sc->sc_balancing == CARP_BAL_IP) {
-                               struct m_tag *mtag;
-                               mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
-                                   M_NOWAIT);
-                               if (mtag == NULL) {
-                                       m_freem(m);
-                                       goto out;
-                               }
-                               m_tag_prepend(m, mtag);
-                       }
-                       break;
+                       ether_enqueue(ifp, ml, m);
+                       SRPL_LEAVE(&sr);
+                       return (1);
                }
        }
+       SRPL_LEAVE(&sr);
 
-       if (sc == NULL) {
-               SRPL_LEAVE(&sr);
-
-               if (!ETHER_IS_MULTICAST(eh->ether_dhost))
-                       return (0);
-
+       if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
                /*
                 * XXX Should really check the list of multicast addresses
                 * for each CARP interface _before_ copying.
@@ -1438,29 +1408,22 @@ carp_input(struct ifnet *ifp0, struct mb
                SRPL_FOREACH(sc, &sr, cif, sc_list) {
                        struct mbuf *m0;
 
-                       if (!(sc->sc_if.if_flags & IFF_UP))
+                       ifp = &sc->sc_if;
+                       if (!ISSET(ifp->if_flags, IFF_RUNNING))
                                continue;
 
+                       /* check carp_vhe_match? */
+
                        m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT);
                        if (m0 == NULL)
                                continue;
 
-                       ml_init(&ml);
-                       ml_enqueue(&ml, m0);
-
-                       if_input(&sc->sc_if, &ml);
+                       ether_enqueue(ifp, ml, m0);
                }
                SRPL_LEAVE(&sr);
-
-               return (0);
        }
 
-       ml_enqueue(&ml, m);
-       if_input(&sc->sc_if, &ml);
-out:
-       SRPL_LEAVE(&sr);
-
-       return (1);
+       return (0);
 }
 
 int
@@ -1747,9 +1710,6 @@ carp_set_ifp(struct carp_softc *sc, stru
        sc->lh_cookie = hook_establish(ifp0->if_linkstatehooks, 1,
            carp_carpdev_state, ifp0);
 
-       /* Change input handler of the physical interface. */
-       if_ih_insert(ifp0, carp_input, NULL);
-
        carp_carpdev_state(ifp0);
 
        return (0);
Index: netinet/ip_carp.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.h,v
retrieving revision 1.45
diff -u -p -r1.45 ip_carp.h
--- netinet/ip_carp.h   10 Jan 2018 23:50:39 -0000      1.45
+++ netinet/ip_carp.h   11 Jan 2018 01:20:56 -0000
@@ -200,6 +200,7 @@ void                 carp_group_demote_adj(struct ifne
 int             carp6_proto_input(struct mbuf **, int *, int, int);
 int             carp_iamatch(struct ifnet *);
 int             carp_ourether(struct ifnet *, u_int8_t *);
+int             carp_input(struct ifnet *, struct mbuf_list *, struct mbuf *);
 int             carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
                     struct rtentry *);
 int             carp_sysctl(int *, u_int,  void *, size_t *, void *, size_t);


Reply via email to