On Sun, Dec 23, 2018 at 01:53:48PM -0200, Martin Pieuchot wrote:
> On 20/12/18(Thu) 11:35, David Gwynne wrote:
> > My last attempt in "let etherip(4) output directly to the ip stack"
> > didn't go so well. This one at least doesn't break bridge(4) on
> > Ethernet.
> 
> It breaks it on mpw(4).

I argue that mpw(4) is broken anyway, but this would make it worse, yes.

> > This diff effectively turns if_enqueue into a per interface function
> > pointer. Things still call if_enqueue(), which does some pf stuff, and then
> > it calls ifp->if_enqueue. By default, that function pointer is
> > if_enqueue_ifq() which puts the packet on the interface send queues.
> > 
> > Ethernet interfaces intercept the call to if_enqueue_ifq(). They get set
> > up to call ether_enqueue, which does the bridge stuff and then calls
> > ac->ac_enqueue, which shadows the ifp->if_enqueue functionality.
> 
> Why do we need two abstraction layers?  It feels weird to have
> pseudo-drivers rely on a pointer in 'struct arpcom', because
> they don't necessarily need this structure.

If a pseudo-driver pretends to be Ethernet it will have arpcom. The main
reason to have two layers for Ethernet interfaces is to take bridge(4)
knowledge out of the generic interface code.

> > This takes Ethernet bridge(4) knowledge out of the generic interface
> > code. Non-Ethernet things that want to bypass ifqs can override
> > ifp->if_enqueue. Ethernet things can override ac->ac_enqueue.
> 
> Why take it out of the generic?  We spent a lot of energy putting it
> there, now you're taking the opposite direction.  So let's spread
> NBRIDGE > 1 all over the stack again?

The bridge stuff would just be in if_ethersubr.c. Hopefully.
if_isconnected() would be harder to pull apart.

> 
> > This diff includes a change to vlan(4) to take advantage of this. It
> > overrides ac_enqueue with vlan_enqueue(). So a packet sent out a vlan
> > interface will go into if_enqueue(), which will call ether_enqueue()
> > because ether_ifattach set that up. ether_enqueue() will call
> > ac->ac_enqueue which is vlan_enqueue on vlan interfaces, which will
> > eventually call if_enqueue() on the parent interface.
> 
> I like the idea behind vlan_transmit().  
> 
> > There are some things I'd like to try if this goes in. eg, it may be
> > possible to have ether_enqueue simple call ac->ac_enqueue, and let
> > bridge(4) swap it out with bridge_enqueue code. Doing that now would
> > complicate the diff though.
> 
> Why?  What's the gain?

Avoiding a conditional in a hotpath?

> > Anyway, how's this look, esp relative to the previous diff?
> 
> It looks better but I'm afraid this diff still breaks stuff, I'd
> suggest you to keep your fix simple.  I'm afraid with all the
> abstractions that you're introducing.  They look like premature
> optimizations to me.

Fair enough.

> One more comment below.
> 
> > Index: net/if.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if.c,v
> > retrieving revision 1.569
> > diff -u -p -r1.569 if.c
> > --- net/if.c        11 Dec 2018 22:08:57 -0000      1.569
> > +++ net/if.c        19 Dec 2018 06:48:37 -0000
> > @@ -683,34 +685,33 @@ if_qstart_compat(struct ifqueue *ifq)
> >  int
> >  if_enqueue(struct ifnet *ifp, struct mbuf *m)
> >  {
> > -   unsigned int idx;
> > -   struct ifqueue *ifq;
> > -   int error;
> > -
> >  #if NPF > 0
> >     if (m->m_pkthdr.pf.delay > 0)
> >             return (pf_delay_pkt(m, ifp->if_index));
> > -#endif
> > -
> > -#if NBRIDGE > 0
> > -   if (ifp->if_bridgeport && (m->m_flags & M_PROTO1) == 0) {
> > -           KERNEL_LOCK();
> > -           error = bridge_output(ifp, m, NULL, NULL);
> > -           KERNEL_UNLOCK();
> > -           return (error);
> > -   }
> > -#endif
> >  
> > -#if NPF > 0
> >     pf_pkt_addr_changed(m);
> >  #endif     /* NPF > 0 */
> 
> Are you sure calling pf_pkt_addr_changed() before bridge_output() won't
> create any unwanted side effect?  In any case I'd suggest keeping this
> separated, to be able to revert it easily :D

bridge calls if_enqueue to send a packet out, so it'll get
pf_pkt_addr_changed() called on the new interface either way. However,
policy via pf on bridge would be affected.

Unpreoptimising the code gets us back to the original order though.

How's this look?

Index: if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.569
diff -u -p -r1.569 if.c
--- if.c        11 Dec 2018 22:08:57 -0000      1.569
+++ if.c        30 Dec 2018 02:56:06 -0000
@@ -632,6 +632,8 @@ if_attach_common(struct ifnet *ifp)
 
        if (ifp->if_rtrequest == NULL)
                ifp->if_rtrequest = if_rtrequest_dummy;
+       if (ifp->if_enqueue == NULL)
+               ifp->if_enqueue = if_enqueue_ifq;
        ifp->if_llprio = IFQ_DEFPRIO;
 
        SRPL_INIT(&ifp->if_inputs);
@@ -683,10 +685,6 @@ if_qstart_compat(struct ifqueue *ifq)
 int
 if_enqueue(struct ifnet *ifp, struct mbuf *m)
 {
-       unsigned int idx;
-       struct ifqueue *ifq;
-       int error;
-
 #if NPF > 0
        if (m->m_pkthdr.pf.delay > 0)
                return (pf_delay_pkt(m, ifp->if_index));
@@ -694,6 +692,8 @@ if_enqueue(struct ifnet *ifp, struct mbu
 
 #if NBRIDGE > 0
        if (ifp->if_bridgeport && (m->m_flags & M_PROTO1) == 0) {
+               int error;
+
                KERNEL_LOCK();
                error = bridge_output(ifp, m, NULL, NULL);
                KERNEL_UNLOCK();
@@ -705,12 +705,26 @@ if_enqueue(struct ifnet *ifp, struct mbu
        pf_pkt_addr_changed(m);
 #endif /* NPF > 0 */
 
-       /*
-        * use the operations on the first ifq to pick which of the array
-        * gets this mbuf.
-        */
-       idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
-       ifq = ifp->if_ifqs[idx];
+       return ((*ifp->if_enqueue)(ifp, m));
+}
+
+int
+if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
+{
+       struct ifqueue *ifq = &ifp->if_snd;
+       int error;
+
+       if (ifp->if_nifqs > 1) {
+               unsigned int idx;
+
+               /*
+                * use the operations on the first ifq to pick which of
+                * the array gets this mbuf.
+                */
+
+               idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
+               ifq = ifp->if_ifqs[idx];
+       }
 
        error = ifq_enqueue(ifq, m);
        if (error)
Index: if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.92
diff -u -p -r1.92 if_var.h
--- if_var.h    19 Dec 2018 05:31:28 -0000      1.92
+++ if_var.h    30 Dec 2018 02:56:06 -0000
@@ -162,6 +162,7 @@ struct ifnet {                              /* and the 
entries */
                                        /* link level output function */
        int     (*if_ll_output)(struct ifnet *, struct mbuf *,
                    struct sockaddr *, struct rtentry *);
+       int     (*if_enqueue)(struct ifnet *, struct mbuf *);
        void    (*if_start)(struct ifnet *);    /* initiate output */
        int     (*if_ioctl)(struct ifnet *, u_long, caddr_t); /* ioctl hook */
        void    (*if_watchdog)(struct ifnet *); /* timer routine */
@@ -331,6 +332,7 @@ extern struct ifnet_head ifnet;
 
 void   if_start(struct ifnet *);
 int    if_enqueue(struct ifnet *, struct mbuf *);
+int    if_enqueue_ifq(struct ifnet *, struct mbuf *);
 void   if_input(struct ifnet *, struct mbuf_list *);
 void   if_input_process(struct ifnet *, struct mbuf_list *);
 int    if_input_local(struct ifnet *, struct mbuf *, sa_family_t);
Index: if_vlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.179
diff -u -p -r1.179 if_vlan.c
--- if_vlan.c   16 Nov 2018 08:43:08 -0000      1.179
+++ if_vlan.c   30 Dec 2018 02:56:06 -0000
@@ -58,6 +58,7 @@
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/rwlock.h>
+#include <sys/percpu.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
@@ -85,6 +86,7 @@ int   vlan_clone_create(struct if_clone *,
 int    vlan_clone_destroy(struct ifnet *);
 
 int    vlan_input(struct ifnet *, struct mbuf *, void *);
+int    vlan_enqueue(struct ifnet *, struct mbuf *);
 void   vlan_start(struct ifqueue *ifq);
 int    vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
 
@@ -178,9 +180,12 @@ vlan_clone_create(struct if_clone *ifc, 
        ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
        ifp->if_xflags = IFXF_CLONED|IFXF_MPSAFE;
        ifp->if_qstart = vlan_start;
+       ifp->if_enqueue = vlan_enqueue;
        ifp->if_ioctl = vlan_ioctl;
        ifp->if_hardmtu = 0xffff;
        ifp->if_link_state = LINK_STATE_DOWN;
+
+       if_counters_alloc(ifp);
        if_attach(ifp);
        ether_ifattach(ifp);
        ifp->if_hdrlen = EVL_ENCAPLEN;
@@ -240,69 +245,97 @@ vlan_mplstunnel(int ifidx)
 }
 
 void
+vlan_transmit(struct ifvlan *ifv, struct ifnet *ifp0, struct mbuf *m)
+{
+       struct ifnet *ifp = &ifv->ifv_if;
+       int txprio = ifv->ifv_prio;
+       uint8_t prio;
+
+#if NBPFILTER > 0
+       if (ifp->if_bpf)
+               bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif /* NBPFILTER > 0 */
+
+       prio = (txprio == IF_HDRPRIO_PACKET) ?
+           m->m_pkthdr.pf.prio : txprio;
+
+       /* IEEE 802.1p has prio 0 and 1 swapped */
+       if (prio <= 1)
+               prio = !prio;
+
+       /*
+        * If this packet came from a pseudowire it means it already
+        * has all tags it needs, so just output it.
+        */
+       if (vlan_mplstunnel(m->m_pkthdr.ph_ifidx)) {
+               /* NOTHING */
+
+       /*
+        * If the underlying interface cannot do VLAN tag insertion
+        * itself, create an encapsulation header.
+        */
+       } else if ((ifp0->if_capabilities & IFCAP_VLAN_HWTAGGING) &&
+           (ifv->ifv_type == ETHERTYPE_VLAN)) {
+               m->m_pkthdr.ether_vtag = ifv->ifv_tag +
+                   (prio << EVL_PRIO_BITS);
+               m->m_flags |= M_VLANTAG;
+       } else {
+               m = vlan_inject(m, ifv->ifv_type, ifv->ifv_tag |
+                   (prio << EVL_PRIO_BITS));
+               if (m == NULL) {
+                       counters_inc(ifp->if_counters, ifc_oerrors);
+                       return;
+               }
+       }
+
+       if (if_enqueue(ifp0, m))
+               counters_inc(ifp->if_counters, ifc_oerrors);
+}
+
+int
+vlan_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+       struct ifnet *ifp0;
+       struct ifvlan *ifv;
+       int error = 0;
+
+       if (!ifq_is_priq(&ifp->if_snd))
+               return (if_enqueue_ifq(ifp, m));
+
+       ifv = ifp->if_softc;
+       ifp0 = if_get(ifv->ifv_ifp0);
+
+       if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
+               m_freem(m);
+               error = ENETDOWN;
+       } else {
+               counters_pkt(ifp->if_counters,
+                   ifc_opackets, ifc_obytes, m->m_pkthdr.len);
+               vlan_transmit(ifv, ifp0, m);
+       }
+
+       if_put(ifp0);
+
+       return (error);
+}
+
+void
 vlan_start(struct ifqueue *ifq)
 {
        struct ifnet    *ifp = ifq->ifq_if;
-       struct ifvlan   *ifv;
+       struct ifvlan   *ifv = ifp->if_softc;
        struct ifnet    *ifp0;
        struct mbuf     *m;
-       int              txprio;
-       uint8_t          prio;
 
        ifv = ifp->if_softc;
        ifp0 = if_get(ifv->ifv_ifp0);
-       if (ifp0 == NULL || (ifp0->if_flags & (IFF_UP|IFF_RUNNING)) !=
-           (IFF_UP|IFF_RUNNING)) {
+       if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
                ifq_purge(ifq);
                goto leave;
        }
 
-       txprio = ifv->ifv_prio;
-
-       while ((m = ifq_dequeue(ifq)) != NULL) {
-#if NBPFILTER > 0
-               if (ifp->if_bpf)
-                       bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
-#endif /* NBPFILTER > 0 */
-
-               prio = (txprio == IF_HDRPRIO_PACKET) ?
-                   m->m_pkthdr.pf.prio : txprio;
-
-               /* IEEE 802.1p has prio 0 and 1 swapped */
-               if (prio <= 1)
-                       prio = !prio;
-
-               /*
-                * If this packet came from a pseudowire it means it already
-                * has all tags it needs, so just output it.
-                */
-               if (vlan_mplstunnel(m->m_pkthdr.ph_ifidx)) {
-                       /* NOTHING */
-
-               /*
-                * If the underlying interface cannot do VLAN tag insertion
-                * itself, create an encapsulation header.
-                */
-               } else if ((ifp0->if_capabilities & IFCAP_VLAN_HWTAGGING) &&
-                   (ifv->ifv_type == ETHERTYPE_VLAN)) {
-                       m->m_pkthdr.ether_vtag = ifv->ifv_tag +
-                           (prio << EVL_PRIO_BITS);
-                       m->m_flags |= M_VLANTAG;
-               } else {
-                       m = vlan_inject(m, ifv->ifv_type, ifv->ifv_tag |
-                           (prio << EVL_PRIO_BITS));
-                       if (m == NULL) {
-                               ifp->if_oerrors++;
-                               continue;
-                       }
-               }
-
-               if (if_enqueue(ifp0, m)) {
-                       ifp->if_oerrors++;
-                       ifq->ifq_errors++;
-                       continue;
-               }
-       }
+       while ((m = ifq_dequeue(ifq)) != NULL)
+               vlan_transmit(ifv, ifp0, m);
 
 leave:
        if_put(ifp0);

Reply via email to