On Fri, Nov 30, 2018 at 02:04:40PM -0200, Martin Pieuchot wrote: > On 30/11/18(Fri) 12:35, David Gwynne wrote: > > On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote: > > > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to > > > provide their own output functions so they can bypass the ifq machinery > > > and push the packet onto the underlying layer directly. > > > > > > they'll still need to get an ethernet header though. vlan needs to get > > > the ethernet header and put the vlan shim into it, therefore > > > ether_resolve is exposed. etherip doesnt need a shim, it just wants > > > ethernet encapsulating the payload before adding its own headers to the > > > packet, therefore there is ether_encap. > > > > > > does this make sense? > > > > > > ok? > > > > this shows vlan and etherip using the new functions. > > You're still using ifq_enqueue() and ifq_dequeue(). So, I'm not sure to > understand what is the gain of this change.
If I understood the change correctly it will bypass queueing when no HFSC queueing is configured on that interface. If there is no traffic shaping in use the ifp_output function will directly call the next ifp_output function or in the case of etherip / gre /gif it will call ip_send. IMO this is a step in the right direction. > > Index: net/if_etherip.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if_etherip.c,v > > retrieving revision 1.40 > > diff -u -p -r1.40 if_etherip.c > > --- net/if_etherip.c 12 Nov 2018 23:57:06 -0000 1.40 > > +++ net/if_etherip.c 30 Nov 2018 02:24:09 -0000 > > @@ -102,7 +102,10 @@ void etheripattach(int); > > int etherip_clone_create(struct if_clone *, int); > > int etherip_clone_destroy(struct ifnet *); > > int etherip_ioctl(struct ifnet *, u_long, caddr_t); > > -void etherip_start(struct ifnet *); > > +int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *, > > + struct rtentry *rt); > > +void etherip_start(struct ifqueue *); > > +void etherip_send(struct ifnet *, struct mbuf *); > > int etherip_media_change(struct ifnet *); > > void etherip_media_status(struct ifnet *, struct ifmediareq *); > > int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *); > > @@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if > > ifp->if_softc = sc; > > ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; > > ifp->if_ioctl = etherip_ioctl; > > - ifp->if_start = etherip_start; > > + ifp->if_output = etherip_output; > > + ifp->if_qstart = etherip_start; > > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > > - ifp->if_xflags = IFXF_CLONED; > > + ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED; > > IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); > > ifp->if_capabilities = IFCAP_VLAN_MTU; > > ether_fakeaddr(ifp); > > @@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp, > > } > > > > void > > -etherip_start(struct ifnet *ifp) > > +etherip_send(struct ifnet *ifp, struct mbuf *m) > > { > > struct etherip_softc *sc = ifp->if_softc; > > - struct mbuf *m; > > int error; > > -#if NBPFILTER > 0 > > - caddr_t if_bpf; > > -#endif > > > > - while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { > > #if NBPFILTER > 0 > > - if_bpf = ifp->if_bpf; > > - if (if_bpf) > > - bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); > > + caddr_t if_bpf = ifp->if_bpf; > > + if (if_bpf) > > + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); > > #endif > > > > - switch (sc->sc_tunnel.t_af) { > > - case AF_INET: > > - error = ip_etherip_output(ifp, m); > > - break; > > + switch (sc->sc_tunnel.t_af) { > > + case AF_INET: > > + error = ip_etherip_output(ifp, m); > > + break; > > #ifdef INET6 > > - case AF_INET6: > > - error = ip6_etherip_output(ifp, m); > > - break; > > + case AF_INET6: > > + error = ip6_etherip_output(ifp, m); > > + break; > > #endif > > - default: > > - /* unhandled_af(sc->sc_tunnel.t_af); */ > > - m_freem(m); > > - continue; > > - } > > + default: > > + /* unhandled_af(sc->sc_tunnel.t_af); */ > > + m_freem(m); > > + error = ENETDOWN; > > + break; > > + } > > > > - if (error) > > - ifp->if_oerrors++; > > + if (error) > > + ifp->if_oerrors++; > > +} > > + > > +void > > +etherip_start(struct ifqueue *ifq) > > +{ > > + struct ifnet *ifp = ifq->ifq_if; > > + struct mbuf *m; > > + > > + while ((m = ifq_dequeue(ifq)) != NULL) > > + etherip_send(ifp, m); > > +} > > + > > +int > > +etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > > + struct rtentry *rt) > > +{ > > + int error; > > + > > + m = ether_encap(ifp, m, dst, rt, &error); > > + if (m == NULL) > > + return (error); > > + > > + if (ifq_is_priq(&ifp->if_snd)) { > > + etherip_send(ifp, m); > > + return (0); > > } > > + > > + return (ifq_enqueue(&ifp->if_snd, m)); > > } > > > > int > > Index: net/if_ethersubr.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if_ethersubr.c,v > > retrieving revision 1.253 > > diff -u -p -r1.253 if_ethersubr.c > > --- net/if_ethersubr.c 13 Mar 2018 01:31:48 -0000 1.253 > > +++ net/if_ethersubr.c 30 Nov 2018 02:24:09 -0000 > > @@ -483,7 +510,8 @@ ether_ifattach(struct ifnet *ifp) > > ifp->if_addrlen = ETHER_ADDR_LEN; > > ifp->if_hdrlen = ETHER_HDR_LEN; > > ifp->if_mtu = ETHERMTU; > > - ifp->if_output = ether_output; > > + if (ifp->if_output == NULL) > > + ifp->if_output = ether_output; > > ifp->if_rtrequest = ether_rtrequest; > > > > if_ih_insert(ifp, ether_input, NULL); > > Index: net/if_vlan.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if_vlan.c,v > > retrieving revision 1.179 > > diff -u -p -r1.179 if_vlan.c > > --- net/if_vlan.c 16 Nov 2018 08:43:08 -0000 1.179 > > +++ net/if_vlan.c 30 Nov 2018 02:24:09 -0000 > > @@ -58,6 +58,7 @@ > > #include <sys/sockio.h> > > #include <sys/systm.h> > > #include <sys/rwlock.h> > > +#include <sys/percpu.h> > > > > #include <net/if.h> > > #include <net/if_dl.h> > > @@ -85,6 +86,8 @@ int vlan_clone_create(struct if_clone *, > > int vlan_clone_destroy(struct ifnet *); > > > > int vlan_input(struct ifnet *, struct mbuf *, void *); > > +int vlan_output(struct ifnet *, struct mbuf *, struct sockaddr *, > > + struct rtentry *rt); > > void vlan_start(struct ifqueue *ifq); > > int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); > > > > @@ -177,6 +180,7 @@ vlan_clone_create(struct if_clone *ifc, > > > > ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST; > > ifp->if_xflags = IFXF_CLONED|IFXF_MPSAFE; > > + ifp->if_output = vlan_output; > > ifp->if_qstart = vlan_start; > > ifp->if_ioctl = vlan_ioctl; > > ifp->if_hardmtu = 0xffff; > > @@ -185,6 +189,8 @@ vlan_clone_create(struct if_clone *ifc, > > ether_ifattach(ifp); > > ifp->if_hdrlen = EVL_ENCAPLEN; > > > > + if_counters_alloc(ifp); > > + > > return (0); > > } > > > > @@ -239,6 +245,110 @@ vlan_mplstunnel(int ifidx) > > #endif > > } > > > > +static uint16_t > > +vlan_tag(const struct ifvlan *ifv, const struct mbuf *m) > > +{ > > + int txprio = ifv->ifv_prio; > > + uint16_t prio; > > + > > + prio = (txprio == IF_HDRPRIO_PACKET) ? > > + m->m_pkthdr.pf.prio : txprio; > > + > > + /* IEEE 802.1p has prio 0 and 1 swapped */ > > + if (prio <= 1) > > + prio = !prio; > > + > > + return (ifv->if_prio | (prio << EVL_PRIO_BITS)); > > +} > > + > > +int > > +vlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > > + struct rtentry *rt) > > +{ > > + struct ifvlan *ifv; > > + struct ifnet *ifp0; > > +#if NBPFILTER > 0 > > + caddr_t if_bpf; > > +#endif > > + struct sockaddr vdst; > > + struct ether_header *eh = (struct ether_header *)vdst.sa_data; > > + int txprio; > > + uint16_t vtag; > > + int error; > > + unsigned int bytes; > > + > > + if (!ifq_is_priq(&ifp->if_snd)) { > > + /* > > + * user wants to delay packets, which relies on the ifq > > + * machinery. fall back to if_enqueue via ether_output. > > + */ > > + return (ether_output(ifp, m, dst, rt)); > > + } > > + > > + error = ether_resolve(ifp, m, dst, rt, eh); > > + switch (error) { > > + case 0: > > + break; > > + case EAGAIN: > > + return (0); > > + default: > > + return (error); > > + } > > + > > + ifv = ifp->if_softc; > > + ifp0 = if_get(ifv->ifv_ifp0); > > + if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) { > > + m_freem(m); > > + error = ENETDOWN; > > + goto leave; > > + } > > + > > +#if NBPFILTER > 0 > > + if_bpf = ifp->if_bpf; > > + if (if_bpf) { > > + bpf_mtap_hdr(if_bpf, (caddr_t)eh, sizeof(*eh), m, > > + BPF_DIRECTION_OUT, NULL); > > + } > > +#endif > > + > > + vtag = vlan_tag(ifv, m); > > + > > + bytes = sizeof(*eh) + m->m_pkthdr.len; /* plus shim? */ > > + > > + /* > > + * If the underlying interface cannot do VLAN tag insertion > > + * itself, create an encapsulation header. > > + */ > > + if (ISSET(ifp0->if_capabilities, IFCAP_VLAN_HWTAGGING) && > > + ifv->ifv_type == ETHERTYPE_VLAN) { > > + m->m_pkthdr.ether_vtag = vtag; > > + m->m_flags |= M_VLANTAG; > > + } else { > > + struct ether_vlan_shim *evl; > > + > > + M_PREPEND(m, sizeof(*evl), M_DONTWAIT); > > + if (m == NULL) { > > + error = ENOBUFS; > > + goto leave; > > + } > > + > > + evl = mtod(m, struct ether_vlan_shim *); > > + evl->evl_tag = htons(vtag); > > + evl->evl_proto = eh->ether_type; > > + > > + eh->ether_type = htons(ifv->ifv_type); > > + } > > + > > + counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes, bytes); > > + > > + vdst.sa_family = pseudo_AF_HDRCMPLT; > > + error = ifp0->if_output(ifp0, m, &vdst, NULL); > > + > > +leave: > > + if_put(ifp0); > > + return (error); > > +} > > + > > void > > vlan_start(struct ifqueue *ifq) > > { > > @@ -246,31 +356,22 @@ vlan_start(struct ifqueue *ifq) > > struct ifvlan *ifv; > > struct ifnet *ifp0; > > struct mbuf *m; > > - int txprio; > > - uint8_t prio; > > + uint16_t vtag; > > > > ifv = ifp->if_softc; > > ifp0 = if_get(ifv->ifv_ifp0); > > - if (ifp0 == NULL || (ifp0->if_flags & (IFF_UP|IFF_RUNNING)) != > > - (IFF_UP|IFF_RUNNING)) { > > + if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) { > > ifq_purge(ifq); > > goto leave; > > } > > > > - txprio = ifv->ifv_prio; > > - > > while ((m = ifq_dequeue(ifq)) != NULL) { > > #if NBPFILTER > 0 > > if (ifp->if_bpf) > > bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); > > #endif /* NBPFILTER > 0 */ > > > > - prio = (txprio == IF_HDRPRIO_PACKET) ? > > - m->m_pkthdr.pf.prio : txprio; > > - > > - /* IEEE 802.1p has prio 0 and 1 swapped */ > > - if (prio <= 1) > > - prio = !prio; > > + vtag = vlan_tag(ifv, m); > > > > /* > > * If this packet came from a pseudowire it means it already > > @@ -285,23 +386,17 @@ vlan_start(struct ifqueue *ifq) > > */ > > } else if ((ifp0->if_capabilities & IFCAP_VLAN_HWTAGGING) && > > (ifv->ifv_type == ETHERTYPE_VLAN)) { > > - m->m_pkthdr.ether_vtag = ifv->ifv_tag + > > - (prio << EVL_PRIO_BITS); > > + m->m_pkthdr.ether_vtag = vtag; > > m->m_flags |= M_VLANTAG; > > } else { > > - m = vlan_inject(m, ifv->ifv_type, ifv->ifv_tag | > > - (prio << EVL_PRIO_BITS)); > > + m = vlan_inject(m, ifv->ifv_type, vtag); > > if (m == NULL) { > > - ifp->if_oerrors++; > > + ifq->ifq_oerrors++; > > continue; > > } > > } > > > > - if (if_enqueue(ifp0, m)) { > > - ifp->if_oerrors++; > > - ifq->ifq_errors++; > > - continue; > > - } > > + if_enqueue(ifp0, m); > > } > > > > leave: > > Index: net/ifq.h > > =================================================================== > > RCS file: /cvs/src/sys/net/ifq.h,v > > retrieving revision 1.20 > > diff -u -p -r1.20 ifq.h > > --- net/ifq.h 4 Jan 2018 11:02:57 -0000 1.20 > > +++ net/ifq.h 30 Nov 2018 02:24:09 -0000 > > @@ -421,6 +422,7 @@ void ifq_barrier(struct ifqueue *); > > #define ifq_len(_ifq) ((_ifq)->ifq_len) > > #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) > > #define ifq_set_maxlen(_ifq, _l) ((_ifq)->ifq_maxlen = (_l)) > > +#define ifq_is_priq(_ifq) ((_ifq)->ifq_ops == > > ifq_priq_ops) > > > > static inline void > > ifq_set_oactive(struct ifqueue *ifq) > > Index: netinet/if_ether.h > > =================================================================== > > RCS file: /cvs/src/sys/netinet/if_ether.h,v > > retrieving revision 1.73 > > diff -u -p -r1.73 if_ether.h > > --- netinet/if_ether.h 29 Nov 2016 10:09:57 -0000 1.73 > > +++ netinet/if_ether.h 30 Nov 2018 02:24:09 -0000 > > @@ -92,6 +92,11 @@ struct ether_vlan_header { > > u_int16_t evl_proto; > > }; > > > > +struct ether_vlan_shim { > > + u_int16_t evl_tag; > > + u_int16_t evl_proto; > > +}; > > + > > #define EVL_VLID_MASK 0xFFF > > #define EVL_VLID_NULL 0x000 > > /* 0x000 and 0xfff are reserved */ > > Index: net/if_var.h > > =================================================================== > > RCS file: /cvs/src/sys/net/if_var.h,v > > retrieving revision 1.90 > > diff -u -p -r1.90 if_var.h > > --- net/if_var.h 10 Sep 2018 16:18:34 -0000 1.90 > > +++ net/if_var.h 30 Nov 2018 02:24:09 -0000 > > @@ -76,6 +76,7 @@ > > struct rtentry; > > struct ifnet; > > struct task; > > +struct cpumem; > > > > /* > > * Structure describing a `cloning' interface. > > @@ -144,6 +145,7 @@ struct ifnet { /* and the > > entries */ > > unsigned short if_flags; /* [N] up/down, broadcast, etc. */ > > int if_xflags; /* [N] extra softnet flags */ > > struct if_data if_data; /* stats and other data about if */ > > + struct cpumem *if_counters; /* per cpu stats */ > > uint32_t if_hardmtu; /* [d] maximum MTU device supports */ > > char if_description[IFDESCRSIZE]; /* [c] interface description */ > > u_short if_rtlabelid; /* [c] next route label */ > > @@ -202,6 +204,23 @@ struct ifnet { /* and the > > entries */ > > #define if_capabilities if_data.ifi_capabilities > > #define if_rdomain if_data.ifi_rdomain > > > > +enum if_counters { > > + ifc_ipackets, /* packets received on interface */ > > + ifc_ierrors, /* input errors on interface */ > > + ifc_opackets, /* packets sent on interface */ > > + ifc_oerrors, /* output errors on interface */ > > + ifc_collisions, /* collisions on csma interfaces */ > > + ifc_ibytes, /* total number of octets received */ > > + ifc_obytes, /* total number of octets sent */ > > + ifc_imcasts, /* packets received via multicast */ > > + ifc_omcasts, /* packets sent via multicast */ > > + ifc_iqdrops, /* dropped on input, this interface */ > > + ifc_oqdrops, /* dropped on output, this interface */ > > + ifc_noproto, /* destined for unsupported protocol */ > > + > > + ifc_ncounters > > +}; > > + > > /* > > * The ifaddr structure contains information about one address > > * of an interface. They are maintained by the different address families, > > @@ -356,6 +375,9 @@ u_int if_rxr_get(struct if_rxring *, u_i > > int if_rxr_info_ioctl(struct if_rxrinfo *, u_int, struct > > if_rxring_info *); > > int if_rxr_ioctl(struct if_rxrinfo *, const char *, u_int, > > struct if_rxring *); > > + > > +void if_counters_alloc(struct ifnet *); > > +void if_counters_free(struct ifnet *); > > > > #endif /* _KERNEL */ > > > > Index: net/if.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if.c,v > > retrieving revision 1.568 > > diff -u -p -r1.568 if.c > > --- net/if.c 29 Nov 2018 00:11:49 -0000 1.568 > > +++ net/if.c 30 Nov 2018 02:24:09 -0000 > > @@ -84,6 +84,7 @@ > > #include <sys/domain.h> > > #include <sys/task.h> > > #include <sys/atomic.h> > > +#include <sys/percpu.h> > > #include <sys/proc.h> > > > > #include <dev/rndvar.h> > > @@ -1103,6 +1104,9 @@ if_detach(struct ifnet *ifp) > > splx(s); > > NET_UNLOCK(); > > > > + if (ifp->if_counters != NULL) > > + if_counters_free(ifp); > > + > > for (i = 0; i < ifp->if_nifqs; i++) > > ifq_destroy(ifp->if_ifqs[i]); > > if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) { > > @@ -2362,11 +2366,47 @@ ifconf(caddr_t data) > > } > > > > void > > +if_counters_alloc(struct ifnet *ifp) > > +{ > > + KASSERT(ifp->if_counters == NULL); > > + > > + ifp->if_counters = counters_alloc(ifc_ncounters); > > +} > > + > > +void > > +if_counters_free(struct ifnet *ifp) > > +{ > > + KASSERT(ifp->if_counters != NULL); > > + > > + counters_free(ifp->if_counters, ifc_ncounters); > > + ifp->if_counters = NULL; > > +} > > + > > +void > > if_getdata(struct ifnet *ifp, struct if_data *data) > > { > > unsigned int i; > > > > *data = ifp->if_data; > > + > > + if (ifp->if_counters != NULL) { > > + uint64_t counters[ifc_ncounters]; > > + > > + counters_read(ifp->if_counters, counters, nitems(counters)); > > + > > + data->ifi_ipackets += counters[ifc_ipackets]; > > + data->ifi_ierrors += counters[ifc_ierrors]; > > + data->ifi_opackets += counters[ifc_opackets]; > > + data->ifi_oerrors += counters[ifc_oerrors]; > > + data->ifi_collisions += counters[ifc_collisions]; > > + data->ifi_ibytes += counters[ifc_ibytes]; > > + data->ifi_obytes += counters[ifc_obytes]; > > + data->ifi_imcasts += counters[ifc_imcasts]; > > + data->ifi_omcasts += counters[ifc_omcasts]; > > + data->ifi_iqdrops += counters[ifc_iqdrops]; > > + data->ifi_oqdrops += counters[ifc_oqdrops]; > > + data->ifi_noproto += counters[ifc_noproto]; > > + } > > > > for (i = 0; i < ifp->if_nifqs; i++) { > > struct ifqueue *ifq = ifp->if_ifqs[i]; > > > -- :wq Claudio