On Fri, Nov 30, 2018 at 02:04:40PM -0200, Martin Pieuchot wrote:
> On 30/11/18(Fri) 12:35, David Gwynne wrote:
> > On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote:
> > > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
> > > provide their own output functions so they can bypass the ifq machinery
> > > and push the packet onto the underlying layer directly.
> > > 
> > > they'll still need to get an ethernet header though. vlan needs to get
> > > the ethernet header and put the vlan shim into it, therefore
> > > ether_resolve is exposed. etherip doesnt need a shim, it just wants
> > > ethernet encapsulating the payload before adding its own headers to the
> > > packet, therefore there is ether_encap.
> > > 
> > > does this make sense?
> > > 
> > > ok?
> > 
> > this shows vlan and etherip using the new functions.
> 
> You're still using ifq_enqueue() and ifq_dequeue(). So, I'm not sure to
> understand what is the gain of this change.

If I understood the change correctly it will bypass queueing when no HFSC
queueing is configured on that interface. If there is no traffic shaping
in use the ifp_output function will directly call the next ifp_output
function or in the case of etherip / gre /gif it will call ip_send.

IMO this is a step in the right direction.
 
> > Index: net/if_etherip.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_etherip.c,v
> > retrieving revision 1.40
> > diff -u -p -r1.40 if_etherip.c
> > --- net/if_etherip.c        12 Nov 2018 23:57:06 -0000      1.40
> > +++ net/if_etherip.c        30 Nov 2018 02:24:09 -0000
> > @@ -102,7 +102,10 @@ void etheripattach(int);
> >  int etherip_clone_create(struct if_clone *, int);
> >  int etherip_clone_destroy(struct ifnet *);
> >  int etherip_ioctl(struct ifnet *, u_long, caddr_t);
> > -void etherip_start(struct ifnet *);
> > +int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> > +    struct rtentry *rt);
> > +void etherip_start(struct ifqueue *);
> > +void etherip_send(struct ifnet *, struct mbuf *);
> >  int etherip_media_change(struct ifnet *);
> >  void etherip_media_status(struct ifnet *, struct ifmediareq *);
> >  int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *);
> > @@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if
> >     ifp->if_softc = sc;
> >     ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
> >     ifp->if_ioctl = etherip_ioctl;
> > -   ifp->if_start = etherip_start;
> > +   ifp->if_output = etherip_output;
> > +   ifp->if_qstart = etherip_start;
> >     ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> > -   ifp->if_xflags = IFXF_CLONED;
> > +   ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED;
> >     IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
> >     ifp->if_capabilities = IFCAP_VLAN_MTU;
> >     ether_fakeaddr(ifp);
> > @@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp, 
> >  }
> >  
> >  void
> > -etherip_start(struct ifnet *ifp)
> > +etherip_send(struct ifnet *ifp, struct mbuf *m)
> >  {
> >     struct etherip_softc *sc = ifp->if_softc;
> > -   struct mbuf *m;
> >     int error;
> > -#if NBPFILTER > 0
> > -   caddr_t if_bpf;
> > -#endif
> >  
> > -   while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
> >  #if NBPFILTER > 0
> > -           if_bpf = ifp->if_bpf;
> > -           if (if_bpf)
> > -                   bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
> > +   caddr_t if_bpf = ifp->if_bpf;
> > +   if (if_bpf)
> > +           bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
> >  #endif
> >  
> > -           switch (sc->sc_tunnel.t_af) {
> > -           case AF_INET:
> > -                   error = ip_etherip_output(ifp, m);
> > -                   break;
> > +   switch (sc->sc_tunnel.t_af) {
> > +   case AF_INET:
> > +           error = ip_etherip_output(ifp, m);
> > +           break;
> >  #ifdef INET6
> > -           case AF_INET6:
> > -                   error = ip6_etherip_output(ifp, m);
> > -                   break;
> > +   case AF_INET6:
> > +           error = ip6_etherip_output(ifp, m);
> > +           break;
> >  #endif
> > -           default:
> > -                   /* unhandled_af(sc->sc_tunnel.t_af); */
> > -                   m_freem(m);
> > -                   continue;
> > -           }
> > +   default:
> > +           /* unhandled_af(sc->sc_tunnel.t_af); */
> > +           m_freem(m);
> > +           error = ENETDOWN;
> > +           break;
> > +   }
> >  
> > -           if (error)
> > -                   ifp->if_oerrors++;
> > +   if (error)
> > +           ifp->if_oerrors++;
> > +}
> > +
> > +void
> > +etherip_start(struct ifqueue *ifq)
> > +{
> > +   struct ifnet *ifp = ifq->ifq_if;
> > +   struct mbuf *m;
> > +
> > +   while ((m = ifq_dequeue(ifq)) != NULL)
> > +           etherip_send(ifp, m);
> > +}
> > +
> > +int
> > +etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> > +    struct rtentry *rt)
> > +{
> > +   int error;
> > +
> > +   m = ether_encap(ifp, m, dst, rt, &error);
> > +   if (m == NULL)
> > +           return (error);
> > +
> > +   if (ifq_is_priq(&ifp->if_snd)) {
> > +           etherip_send(ifp, m);
> > +           return (0);
> >     }
> > +
> > +   return (ifq_enqueue(&ifp->if_snd, m));
> >  }
> >  
> >  int
> > Index: net/if_ethersubr.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> > retrieving revision 1.253
> > diff -u -p -r1.253 if_ethersubr.c
> > --- net/if_ethersubr.c      13 Mar 2018 01:31:48 -0000      1.253
> > +++ net/if_ethersubr.c      30 Nov 2018 02:24:09 -0000
> > @@ -483,7 +510,8 @@ ether_ifattach(struct ifnet *ifp)
> >     ifp->if_addrlen = ETHER_ADDR_LEN;
> >     ifp->if_hdrlen = ETHER_HDR_LEN;
> >     ifp->if_mtu = ETHERMTU;
> > -   ifp->if_output = ether_output;
> > +   if (ifp->if_output == NULL)
> > +           ifp->if_output = ether_output;
> >     ifp->if_rtrequest = ether_rtrequest;
> >  
> >     if_ih_insert(ifp, ether_input, NULL);
> > Index: net/if_vlan.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_vlan.c,v
> > retrieving revision 1.179
> > diff -u -p -r1.179 if_vlan.c
> > --- net/if_vlan.c   16 Nov 2018 08:43:08 -0000      1.179
> > +++ net/if_vlan.c   30 Nov 2018 02:24:09 -0000
> > @@ -58,6 +58,7 @@
> >  #include <sys/sockio.h>
> >  #include <sys/systm.h>
> >  #include <sys/rwlock.h>
> > +#include <sys/percpu.h>
> >  
> >  #include <net/if.h>
> >  #include <net/if_dl.h>
> > @@ -85,6 +86,8 @@ int       vlan_clone_create(struct if_clone *,
> >  int        vlan_clone_destroy(struct ifnet *);
> >  
> >  int        vlan_input(struct ifnet *, struct mbuf *, void *);
> > +int        vlan_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> > +       struct rtentry *rt);
> >  void       vlan_start(struct ifqueue *ifq);
> >  int        vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
> >  
> > @@ -177,6 +180,7 @@ vlan_clone_create(struct if_clone *ifc, 
> >  
> >     ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
> >     ifp->if_xflags = IFXF_CLONED|IFXF_MPSAFE;
> > +   ifp->if_output = vlan_output;
> >     ifp->if_qstart = vlan_start;
> >     ifp->if_ioctl = vlan_ioctl;
> >     ifp->if_hardmtu = 0xffff;
> > @@ -185,6 +189,8 @@ vlan_clone_create(struct if_clone *ifc, 
> >     ether_ifattach(ifp);
> >     ifp->if_hdrlen = EVL_ENCAPLEN;
> >  
> > +   if_counters_alloc(ifp);
> > +
> >     return (0);
> >  }
> >  
> > @@ -239,6 +245,110 @@ vlan_mplstunnel(int ifidx)
> >  #endif
> >  }
> >  
> > +static uint16_t
> > +vlan_tag(const struct ifvlan *ifv, const struct mbuf *m)
> > +{
> > +   int txprio = ifv->ifv_prio;
> > +   uint16_t prio;
> > +
> > +   prio = (txprio == IF_HDRPRIO_PACKET) ?
> > +       m->m_pkthdr.pf.prio : txprio;
> > +
> > +   /* IEEE 802.1p has prio 0 and 1 swapped */
> > +   if (prio <= 1)
> > +           prio = !prio;
> > +
> > +   return (ifv->if_prio | (prio << EVL_PRIO_BITS));
> > +}
> > +
> > +int
> > +vlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> > +    struct rtentry *rt)
> > +{
> > +   struct ifvlan *ifv;
> > +   struct ifnet *ifp0;
> > +#if NBPFILTER > 0
> > +   caddr_t if_bpf;
> > +#endif
> > +   struct sockaddr vdst;
> > +   struct ether_header *eh = (struct ether_header *)vdst.sa_data;
> > +   int txprio;
> > +   uint16_t vtag;
> > +   int error;
> > +   unsigned int bytes;
> > +
> > +   if (!ifq_is_priq(&ifp->if_snd)) {
> > +           /*
> > +            * user wants to delay packets, which relies on the ifq
> > +            * machinery. fall back to if_enqueue via ether_output.
> > +            */
> > +           return (ether_output(ifp, m, dst, rt));
> > +   }
> > +
> > +   error = ether_resolve(ifp, m, dst, rt, eh);
> > +   switch (error) {
> > +   case 0:
> > +           break;
> > +   case EAGAIN:
> > +           return (0);
> > +   default:
> > +           return (error);
> > +   }
> > +
> > +   ifv = ifp->if_softc;
> > +   ifp0 = if_get(ifv->ifv_ifp0);
> > +   if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
> > +           m_freem(m);
> > +           error = ENETDOWN;
> > +           goto leave;
> > +   }
> > +
> > +#if NBPFILTER > 0
> > +   if_bpf = ifp->if_bpf;
> > +   if (if_bpf) {
> > +           bpf_mtap_hdr(if_bpf, (caddr_t)eh, sizeof(*eh), m,
> > +               BPF_DIRECTION_OUT, NULL);
> > +   }
> > +#endif
> > +
> > +   vtag = vlan_tag(ifv, m);
> > +
> > +   bytes = sizeof(*eh) + m->m_pkthdr.len; /* plus shim? */
> > +
> > +   /*
> > +    * If the underlying interface cannot do VLAN tag insertion
> > +    * itself, create an encapsulation header.
> > +    */
> > +   if (ISSET(ifp0->if_capabilities, IFCAP_VLAN_HWTAGGING) &&
> > +       ifv->ifv_type == ETHERTYPE_VLAN) {
> > +           m->m_pkthdr.ether_vtag = vtag;
> > +           m->m_flags |= M_VLANTAG;
> > +   } else {
> > +           struct ether_vlan_shim *evl;
> > +
> > +           M_PREPEND(m, sizeof(*evl), M_DONTWAIT);
> > +           if (m == NULL) {
> > +                   error = ENOBUFS;
> > +                   goto leave;
> > +           }
> > +
> > +           evl = mtod(m, struct ether_vlan_shim *);
> > +           evl->evl_tag = htons(vtag);
> > +           evl->evl_proto = eh->ether_type;
> > +
> > +           eh->ether_type = htons(ifv->ifv_type);
> > +   }
> > +
> > +   counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes, bytes); 
> > +
> > +   vdst.sa_family = pseudo_AF_HDRCMPLT;
> > +   error = ifp0->if_output(ifp0, m, &vdst, NULL);
> > +
> > +leave:
> > +   if_put(ifp0);
> > +   return (error);
> > +}
> > +
> >  void
> >  vlan_start(struct ifqueue *ifq)
> >  {
> > @@ -246,31 +356,22 @@ vlan_start(struct ifqueue *ifq)
> >     struct ifvlan   *ifv;
> >     struct ifnet    *ifp0;
> >     struct mbuf     *m;
> > -   int              txprio;
> > -   uint8_t          prio;
> > +   uint16_t         vtag;
> >  
> >     ifv = ifp->if_softc;
> >     ifp0 = if_get(ifv->ifv_ifp0);
> > -   if (ifp0 == NULL || (ifp0->if_flags & (IFF_UP|IFF_RUNNING)) !=
> > -       (IFF_UP|IFF_RUNNING)) {
> > +   if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
> >             ifq_purge(ifq);
> >             goto leave;
> >     }
> >  
> > -   txprio = ifv->ifv_prio;
> > -
> >     while ((m = ifq_dequeue(ifq)) != NULL) {
> >  #if NBPFILTER > 0
> >             if (ifp->if_bpf)
> >                     bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> >  #endif /* NBPFILTER > 0 */
> >  
> > -           prio = (txprio == IF_HDRPRIO_PACKET) ?
> > -               m->m_pkthdr.pf.prio : txprio;
> > -
> > -           /* IEEE 802.1p has prio 0 and 1 swapped */
> > -           if (prio <= 1)
> > -                   prio = !prio;
> > +           vtag = vlan_tag(ifv, m);
> >  
> >             /*
> >              * If this packet came from a pseudowire it means it already
> > @@ -285,23 +386,17 @@ vlan_start(struct ifqueue *ifq)
> >              */
> >             } else if ((ifp0->if_capabilities & IFCAP_VLAN_HWTAGGING) &&
> >                 (ifv->ifv_type == ETHERTYPE_VLAN)) {
> > -                   m->m_pkthdr.ether_vtag = ifv->ifv_tag +
> > -                       (prio << EVL_PRIO_BITS);
> > +                   m->m_pkthdr.ether_vtag = vtag;
> >                     m->m_flags |= M_VLANTAG;
> >             } else {
> > -                   m = vlan_inject(m, ifv->ifv_type, ifv->ifv_tag |
> > -                       (prio << EVL_PRIO_BITS));
> > +                   m = vlan_inject(m, ifv->ifv_type, vtag);
> >                     if (m == NULL) {
> > -                           ifp->if_oerrors++;
> > +                           ifq->ifq_oerrors++;
> >                             continue;
> >                     }
> >             }
> >  
> > -           if (if_enqueue(ifp0, m)) {
> > -                   ifp->if_oerrors++;
> > -                   ifq->ifq_errors++;
> > -                   continue;
> > -           }
> > +           if_enqueue(ifp0, m);
> >     }
> >  
> >  leave:
> > Index: net/ifq.h
> > ===================================================================
> > RCS file: /cvs/src/sys/net/ifq.h,v
> > retrieving revision 1.20
> > diff -u -p -r1.20 ifq.h
> > --- net/ifq.h       4 Jan 2018 11:02:57 -0000       1.20
> > +++ net/ifq.h       30 Nov 2018 02:24:09 -0000
> > @@ -421,6 +422,7 @@ void             ifq_barrier(struct ifqueue *);
> >  #define    ifq_len(_ifq)                   ((_ifq)->ifq_len)
> >  #define    ifq_empty(_ifq)                 (ifq_len(_ifq) == 0)
> >  #define    ifq_set_maxlen(_ifq, _l)        ((_ifq)->ifq_maxlen = (_l))
> > +#define    ifq_is_priq(_ifq)               ((_ifq)->ifq_ops == 
> > ifq_priq_ops)
> >  
> >  static inline void
> >  ifq_set_oactive(struct ifqueue *ifq)
> > Index: netinet/if_ether.h
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/if_ether.h,v
> > retrieving revision 1.73
> > diff -u -p -r1.73 if_ether.h
> > --- netinet/if_ether.h      29 Nov 2016 10:09:57 -0000      1.73
> > +++ netinet/if_ether.h      30 Nov 2018 02:24:09 -0000
> > @@ -92,6 +92,11 @@ struct  ether_vlan_header {
> >          u_int16_t evl_proto;
> >  };
> >  
> > +struct ether_vlan_shim {
> > +   u_int16_t evl_tag;
> > +   u_int16_t evl_proto;
> > +};
> > +
> >  #define EVL_VLID_MASK      0xFFF
> >  #define EVL_VLID_NULL      0x000
> >  /* 0x000 and 0xfff are reserved */
> > Index: net/if_var.h
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_var.h,v
> > retrieving revision 1.90
> > diff -u -p -r1.90 if_var.h
> > --- net/if_var.h    10 Sep 2018 16:18:34 -0000      1.90
> > +++ net/if_var.h    30 Nov 2018 02:24:09 -0000
> > @@ -76,6 +76,7 @@
> >  struct rtentry;
> >  struct ifnet;
> >  struct task;
> > +struct cpumem;
> >  
> >  /*
> >   * Structure describing a `cloning' interface.
> > @@ -144,6 +145,7 @@ struct ifnet {                          /* and the 
> > entries */
> >     unsigned short if_flags;        /* [N] up/down, broadcast, etc. */
> >     int     if_xflags;              /* [N] extra softnet flags */
> >     struct  if_data if_data;        /* stats and other data about if */
> > +   struct  cpumem *if_counters;    /* per cpu stats */
> >     uint32_t if_hardmtu;            /* [d] maximum MTU device supports */
> >     char    if_description[IFDESCRSIZE]; /* [c] interface description */
> >     u_short if_rtlabelid;           /* [c] next route label */
> > @@ -202,6 +204,23 @@ struct ifnet {                         /* and the 
> > entries */
> >  #define    if_capabilities if_data.ifi_capabilities
> >  #define    if_rdomain      if_data.ifi_rdomain
> >  
> > +enum if_counters {
> > +   ifc_ipackets,           /* packets received on interface */
> > +   ifc_ierrors,            /* input errors on interface */
> > +   ifc_opackets,           /* packets sent on interface */
> > +   ifc_oerrors,            /* output errors on interface */
> > +   ifc_collisions,         /* collisions on csma interfaces */
> > +   ifc_ibytes,             /* total number of octets received */
> > +   ifc_obytes,             /* total number of octets sent */
> > +   ifc_imcasts,            /* packets received via multicast */
> > +   ifc_omcasts,            /* packets sent via multicast */
> > +   ifc_iqdrops,            /* dropped on input, this interface */
> > +   ifc_oqdrops,            /* dropped on output, this interface */
> > +   ifc_noproto,            /* destined for unsupported protocol */
> > +
> > +   ifc_ncounters
> > +};
> > +
> >  /*
> >   * The ifaddr structure contains information about one address
> >   * of an interface.  They are maintained by the different address families,
> > @@ -356,6 +375,9 @@ u_int   if_rxr_get(struct if_rxring *, u_i
> >  int        if_rxr_info_ioctl(struct if_rxrinfo *, u_int, struct 
> > if_rxring_info *);
> >  int        if_rxr_ioctl(struct if_rxrinfo *, const char *, u_int,
> >         struct if_rxring *);
> > +
> > +void       if_counters_alloc(struct ifnet *);
> > +void       if_counters_free(struct ifnet *);
> >  
> >  #endif /* _KERNEL */
> >  
> > Index: net/if.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if.c,v
> > retrieving revision 1.568
> > diff -u -p -r1.568 if.c
> > --- net/if.c        29 Nov 2018 00:11:49 -0000      1.568
> > +++ net/if.c        30 Nov 2018 02:24:09 -0000
> > @@ -84,6 +84,7 @@
> >  #include <sys/domain.h>
> >  #include <sys/task.h>
> >  #include <sys/atomic.h>
> > +#include <sys/percpu.h>
> >  #include <sys/proc.h>
> >  
> >  #include <dev/rndvar.h>
> > @@ -1103,6 +1104,9 @@ if_detach(struct ifnet *ifp)
> >     splx(s);
> >     NET_UNLOCK();
> >  
> > +   if (ifp->if_counters != NULL)
> > +           if_counters_free(ifp);
> > +
> >     for (i = 0; i < ifp->if_nifqs; i++)
> >             ifq_destroy(ifp->if_ifqs[i]);
> >     if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
> > @@ -2362,11 +2366,47 @@ ifconf(caddr_t data)
> >  }
> >  
> >  void
> > +if_counters_alloc(struct ifnet *ifp)
> > +{
> > +   KASSERT(ifp->if_counters == NULL);
> > +
> > +   ifp->if_counters = counters_alloc(ifc_ncounters);
> > +}
> > +
> > +void
> > +if_counters_free(struct ifnet *ifp)
> > +{
> > +   KASSERT(ifp->if_counters != NULL);
> > +
> > +   counters_free(ifp->if_counters, ifc_ncounters);
> > +   ifp->if_counters = NULL;
> > +}
> > +
> > +void
> >  if_getdata(struct ifnet *ifp, struct if_data *data)
> >  {
> >     unsigned int i;
> >  
> >     *data = ifp->if_data;
> > +
> > +   if (ifp->if_counters != NULL) {
> > +           uint64_t counters[ifc_ncounters];
> > +
> > +           counters_read(ifp->if_counters, counters, nitems(counters));
> > +
> > +           data->ifi_ipackets += counters[ifc_ipackets];
> > +           data->ifi_ierrors += counters[ifc_ierrors];
> > +           data->ifi_opackets += counters[ifc_opackets];
> > +           data->ifi_oerrors += counters[ifc_oerrors];
> > +           data->ifi_collisions += counters[ifc_collisions];
> > +           data->ifi_ibytes += counters[ifc_ibytes];
> > +           data->ifi_obytes += counters[ifc_obytes];
> > +           data->ifi_imcasts += counters[ifc_imcasts];
> > +           data->ifi_omcasts += counters[ifc_omcasts];
> > +           data->ifi_iqdrops += counters[ifc_iqdrops];
> > +           data->ifi_oqdrops += counters[ifc_oqdrops];
> > +           data->ifi_noproto += counters[ifc_noproto];
> > +   }
> >  
> >     for (i = 0; i < ifp->if_nifqs; i++) {
> >             struct ifqueue *ifq = ifp->if_ifqs[i];
> > 
> 

-- 
:wq Claudio

Reply via email to