Re: split ether_output up into resolution, encapsulation, and output
On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote: > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to > provide their own output functions so they can bypass the ifq machinery > and push the packet onto the underlying layer directly. > > they'll still need to get an ethernet header though. vlan needs to get > the ethernet header and put the vlan shim into it, therefore > ether_resolve is exposed. etherip doesnt need a shim, it just wants > ethernet encapsulating the payload before adding its own headers to the > packet, therefore there is ether_encap. > > does this make sense? > > ok? Two comments below. Apart from that OK claudio@ > Index: net/if_ethersubr.c > === > RCS file: /cvs/src/sys/net/if_ethersubr.c,v > retrieving revision 1.253 > diff -u -p -r1.253 if_ethersubr.c > --- net/if_ethersubr.c13 Mar 2018 01:31:48 - 1.253 > +++ net/if_ethersubr.c30 Nov 2018 02:02:58 - > @@ -178,24 +178,18 @@ ether_rtrequest(struct ifnet *ifp, int r > break; > } > } > -/* > - * Ethernet output routine. > - * Encapsulate a packet of type family for the local net. > - * Assumes that ifp is actually pointer to arpcom structure. > - */ > + > int > -ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > -struct rtentry *rt) > +ether_resolve(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > +struct rtentry *rt, struct ether_header *eh) > { > - u_int16_t etype; > - u_char edst[ETHER_ADDR_LEN]; > - u_char *esrc; > - struct mbuf *mcopy = NULL; > - struct ether_header *eh; > struct arpcom *ac = (struct arpcom *)ifp; > sa_family_t af = dst->sa_family; > int error = 0; > > + if (!ISSET(ifp->if_flags, IFF_RUNNING)) > + senderr(ENETDOWN); > + > KASSERT(rt != NULL || ISSET(m->m_flags, M_MCAST|M_BCAST) || > af == AF_UNSPEC || af == pseudo_AF_HDRCMPLT); > > @@ -207,28 +201,31 @@ ether_output(struct ifnet *ifp, struct m > } > #endif > > - esrc = ac->ac_enaddr; > - > - if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) > - senderr(ENETDOWN); > - > switch (af) { > case AF_INET: > - error = arpresolve(ifp, rt, m, dst, edst); > + error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); > if (error) > - return (error == EAGAIN ? 0 : error); > + return (error); > + eh->ether_type = htons(ETHERTYPE_IP); > + > /* If broadcasting on a simplex interface, loopback a copy */ > - if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) && > - !m->m_pkthdr.pf.routed) > + if (ISSET(m->m_flags, M_BCAST) && > + ISSET(ifp->if_flags, IFF_SIMPLEX) && > + !m->m_pkthdr.pf.routed) { > + struct mbuf *mcopy; > + > + /* XXX Should we input an unencrypted IPsec packet? */ > mcopy = m_copym(m, 0, M_COPYALL, M_NOWAIT); > - etype = htons(ETHERTYPE_IP); > + if (mcopy != NULL) > + if_input_local(ifp, mcopy, af); > + } > break; > #ifdef INET6 > case AF_INET6: > - error = nd6_resolve(ifp, rt, m, dst, edst); > + error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost); > if (error) > - return (error == EAGAIN ? 0 : error); > - etype = htons(ETHERTYPE_IPV6); > + return (error); > + eh->ether_type = htons(ETHERTYPE_IPV6); > break; > #endif > #ifdef MPLS > @@ -242,72 +239,102 @@ ether_output(struct ifnet *ifp, struct m > senderr(ENETUNREACH); > > switch (dst->sa_family) { > - case AF_LINK: > - if (satosdl(dst)->sdl_alen < sizeof(edst)) > - senderr(EHOSTUNREACH); > - memcpy(edst, LLADDR(satosdl(dst)), > - sizeof(edst)); > - break; > + case AF_LINK: > + if (satosdl(dst)->sdl_alen < sizeof(eh->ether_dhost)) > + senderr(EHOSTUNREACH); > + memcpy(eh->ether_dhost, LLADDR(satosdl(dst)), > + sizeof(eh->ether_dhost)); > + break; > #ifdef INET6 > - case AF_INET6: > - error = nd6_resolve(ifp, rt, m, dst, edst); > - if (error) > - return (error == EAGAIN ? 0 : error); > - break; > + case AF_INET6: > + error = nd6_resolve(ifp, rt, m,
Re: split ether_output up into resolution, encapsulation, and output
On Fri, Nov 30, 2018 at 02:04:40PM -0200, Martin Pieuchot wrote: > On 30/11/18(Fri) 12:35, David Gwynne wrote: > > On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote: > > > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to > > > provide their own output functions so they can bypass the ifq machinery > > > and push the packet onto the underlying layer directly. > > > > > > they'll still need to get an ethernet header though. vlan needs to get > > > the ethernet header and put the vlan shim into it, therefore > > > ether_resolve is exposed. etherip doesnt need a shim, it just wants > > > ethernet encapsulating the payload before adding its own headers to the > > > packet, therefore there is ether_encap. > > > > > > does this make sense? > > > > > > ok? > > > > this shows vlan and etherip using the new functions. > > You're still using ifq_enqueue() and ifq_dequeue(). So, I'm not sure to > understand what is the gain of this change. If I understood the change correctly it will bypass queueing when no HFSC queueing is configured on that interface. If there is no traffic shaping in use the ifp_output function will directly call the next ifp_output function or in the case of etherip / gre /gif it will call ip_send. IMO this is a step in the right direction. > > Index: net/if_etherip.c > > === > > RCS file: /cvs/src/sys/net/if_etherip.c,v > > retrieving revision 1.40 > > diff -u -p -r1.40 if_etherip.c > > --- net/if_etherip.c12 Nov 2018 23:57:06 - 1.40 > > +++ net/if_etherip.c30 Nov 2018 02:24:09 - > > @@ -102,7 +102,10 @@ void etheripattach(int); > > int etherip_clone_create(struct if_clone *, int); > > int etherip_clone_destroy(struct ifnet *); > > int etherip_ioctl(struct ifnet *, u_long, caddr_t); > > -void etherip_start(struct ifnet *); > > +int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *, > > +struct rtentry *rt); > > +void etherip_start(struct ifqueue *); > > +void etherip_send(struct ifnet *, struct mbuf *); > > int etherip_media_change(struct ifnet *); > > void etherip_media_status(struct ifnet *, struct ifmediareq *); > > int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *); > > @@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if > > ifp->if_softc = sc; > > ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; > > ifp->if_ioctl = etherip_ioctl; > > - ifp->if_start = etherip_start; > > + ifp->if_output = etherip_output; > > + ifp->if_qstart = etherip_start; > > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > > - ifp->if_xflags = IFXF_CLONED; > > + ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED; > > IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN); > > ifp->if_capabilities = IFCAP_VLAN_MTU; > > ether_fakeaddr(ifp); > > @@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp, > > } > > > > void > > -etherip_start(struct ifnet *ifp) > > +etherip_send(struct ifnet *ifp, struct mbuf *m) > > { > > struct etherip_softc *sc = ifp->if_softc; > > - struct mbuf *m; > > int error; > > -#if NBPFILTER > 0 > > - caddr_t if_bpf; > > -#endif > > > > - while ((m = ifq_dequeue(>if_snd)) != NULL) { > > #if NBPFILTER > 0 > > - if_bpf = ifp->if_bpf; > > - if (if_bpf) > > - bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); > > + caddr_t if_bpf = ifp->if_bpf; > > + if (if_bpf) > > + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); > > #endif > > > > - switch (sc->sc_tunnel.t_af) { > > - case AF_INET: > > - error = ip_etherip_output(ifp, m); > > - break; > > + switch (sc->sc_tunnel.t_af) { > > + case AF_INET: > > + error = ip_etherip_output(ifp, m); > > + break; > > #ifdef INET6 > > - case AF_INET6: > > - error = ip6_etherip_output(ifp, m); > > - break; > > + case AF_INET6: > > + error = ip6_etherip_output(ifp, m); > > + break; > > #endif > > - default: > > - /* unhandled_af(sc->sc_tunnel.t_af); */ > > - m_freem(m); > > - continue; > > - } > > + default: > > + /* unhandled_af(sc->sc_tunnel.t_af); */ > > + m_freem(m); > > + error = ENETDOWN; > > + break; > > + } > > > > - if (error) > > - ifp->if_oerrors++; > > + if (error) > > + ifp->if_oerrors++; > > +} > > + > > +void > > +etherip_start(struct ifqueue *ifq) > > +{ > > + struct ifnet *ifp = ifq->ifq_if; > > + struct mbuf *m; > > + > > + while ((m = ifq_dequeue(ifq)) != NULL) > > + etherip_send(ifp, m); > > +} > > + > > +int > > +etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > > +struct rtentry *rt) > > +{ > > + int error; > > + > > + m =
Re: split ether_output up into resolution, encapsulation, and output
On 30/11/18(Fri) 12:35, David Gwynne wrote: > On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote: > > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to > > provide their own output functions so they can bypass the ifq machinery > > and push the packet onto the underlying layer directly. > > > > they'll still need to get an ethernet header though. vlan needs to get > > the ethernet header and put the vlan shim into it, therefore > > ether_resolve is exposed. etherip doesnt need a shim, it just wants > > ethernet encapsulating the payload before adding its own headers to the > > packet, therefore there is ether_encap. > > > > does this make sense? > > > > ok? > > this shows vlan and etherip using the new functions. You're still using ifq_enqueue() and ifq_dequeue(). So, I'm not sure to understand what is the gain of this change. > Index: net/if_etherip.c > === > RCS file: /cvs/src/sys/net/if_etherip.c,v > retrieving revision 1.40 > diff -u -p -r1.40 if_etherip.c > --- net/if_etherip.c 12 Nov 2018 23:57:06 - 1.40 > +++ net/if_etherip.c 30 Nov 2018 02:24:09 - > @@ -102,7 +102,10 @@ void etheripattach(int); > int etherip_clone_create(struct if_clone *, int); > int etherip_clone_destroy(struct ifnet *); > int etherip_ioctl(struct ifnet *, u_long, caddr_t); > -void etherip_start(struct ifnet *); > +int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *, > +struct rtentry *rt); > +void etherip_start(struct ifqueue *); > +void etherip_send(struct ifnet *, struct mbuf *); > int etherip_media_change(struct ifnet *); > void etherip_media_status(struct ifnet *, struct ifmediareq *); > int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *); > @@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if > ifp->if_softc = sc; > ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; > ifp->if_ioctl = etherip_ioctl; > - ifp->if_start = etherip_start; > + ifp->if_output = etherip_output; > + ifp->if_qstart = etherip_start; > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > - ifp->if_xflags = IFXF_CLONED; > + ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED; > IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN); > ifp->if_capabilities = IFCAP_VLAN_MTU; > ether_fakeaddr(ifp); > @@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp, > } > > void > -etherip_start(struct ifnet *ifp) > +etherip_send(struct ifnet *ifp, struct mbuf *m) > { > struct etherip_softc *sc = ifp->if_softc; > - struct mbuf *m; > int error; > -#if NBPFILTER > 0 > - caddr_t if_bpf; > -#endif > > - while ((m = ifq_dequeue(>if_snd)) != NULL) { > #if NBPFILTER > 0 > - if_bpf = ifp->if_bpf; > - if (if_bpf) > - bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); > + caddr_t if_bpf = ifp->if_bpf; > + if (if_bpf) > + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); > #endif > > - switch (sc->sc_tunnel.t_af) { > - case AF_INET: > - error = ip_etherip_output(ifp, m); > - break; > + switch (sc->sc_tunnel.t_af) { > + case AF_INET: > + error = ip_etherip_output(ifp, m); > + break; > #ifdef INET6 > - case AF_INET6: > - error = ip6_etherip_output(ifp, m); > - break; > + case AF_INET6: > + error = ip6_etherip_output(ifp, m); > + break; > #endif > - default: > - /* unhandled_af(sc->sc_tunnel.t_af); */ > - m_freem(m); > - continue; > - } > + default: > + /* unhandled_af(sc->sc_tunnel.t_af); */ > + m_freem(m); > + error = ENETDOWN; > + break; > + } > > - if (error) > - ifp->if_oerrors++; > + if (error) > + ifp->if_oerrors++; > +} > + > +void > +etherip_start(struct ifqueue *ifq) > +{ > + struct ifnet *ifp = ifq->ifq_if; > + struct mbuf *m; > + > + while ((m = ifq_dequeue(ifq)) != NULL) > + etherip_send(ifp, m); > +} > + > +int > +etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > +struct rtentry *rt) > +{ > + int error; > + > + m = ether_encap(ifp, m, dst, rt, ); > + if (m == NULL) > + return (error); > + > + if (ifq_is_priq(>if_snd)) { > + etherip_send(ifp, m); > + return (0); > } > + > + return (ifq_enqueue(>if_snd, m)); > } > > int > Index: net/if_ethersubr.c > === > RCS file: /cvs/src/sys/net/if_ethersubr.c,v > retrieving revision 1.253 > diff -u -p -r1.253 if_ethersubr.c > --- net/if_ethersubr.c13 Mar 2018 01:31:48 - 1.253 > +++
Re: split ether_output up into resolution, encapsulation, and output
On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote: > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to > provide their own output functions so they can bypass the ifq machinery > and push the packet onto the underlying layer directly. > > they'll still need to get an ethernet header though. vlan needs to get > the ethernet header and put the vlan shim into it, therefore > ether_resolve is exposed. etherip doesnt need a shim, it just wants > ethernet encapsulating the payload before adding its own headers to the > packet, therefore there is ether_encap. > > does this make sense? > > ok? this shows vlan and etherip using the new functions. Index: net/if_etherip.c === RCS file: /cvs/src/sys/net/if_etherip.c,v retrieving revision 1.40 diff -u -p -r1.40 if_etherip.c --- net/if_etherip.c12 Nov 2018 23:57:06 - 1.40 +++ net/if_etherip.c30 Nov 2018 02:24:09 - @@ -102,7 +102,10 @@ void etheripattach(int); int etherip_clone_create(struct if_clone *, int); int etherip_clone_destroy(struct ifnet *); int etherip_ioctl(struct ifnet *, u_long, caddr_t); -void etherip_start(struct ifnet *); +int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *, +struct rtentry *rt); +void etherip_start(struct ifqueue *); +void etherip_send(struct ifnet *, struct mbuf *); int etherip_media_change(struct ifnet *); void etherip_media_status(struct ifnet *, struct ifmediareq *); int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *); @@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if ifp->if_softc = sc; ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; ifp->if_ioctl = etherip_ioctl; - ifp->if_start = etherip_start; + ifp->if_output = etherip_output; + ifp->if_qstart = etherip_start; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_xflags = IFXF_CLONED; + ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED; IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN); ifp->if_capabilities = IFCAP_VLAN_MTU; ether_fakeaddr(ifp); @@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp, } void -etherip_start(struct ifnet *ifp) +etherip_send(struct ifnet *ifp, struct mbuf *m) { struct etherip_softc *sc = ifp->if_softc; - struct mbuf *m; int error; -#if NBPFILTER > 0 - caddr_t if_bpf; -#endif - while ((m = ifq_dequeue(>if_snd)) != NULL) { #if NBPFILTER > 0 - if_bpf = ifp->if_bpf; - if (if_bpf) - bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); + caddr_t if_bpf = ifp->if_bpf; + if (if_bpf) + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); #endif - switch (sc->sc_tunnel.t_af) { - case AF_INET: - error = ip_etherip_output(ifp, m); - break; + switch (sc->sc_tunnel.t_af) { + case AF_INET: + error = ip_etherip_output(ifp, m); + break; #ifdef INET6 - case AF_INET6: - error = ip6_etherip_output(ifp, m); - break; + case AF_INET6: + error = ip6_etherip_output(ifp, m); + break; #endif - default: - /* unhandled_af(sc->sc_tunnel.t_af); */ - m_freem(m); - continue; - } + default: + /* unhandled_af(sc->sc_tunnel.t_af); */ + m_freem(m); + error = ENETDOWN; + break; + } - if (error) - ifp->if_oerrors++; + if (error) + ifp->if_oerrors++; +} + +void +etherip_start(struct ifqueue *ifq) +{ + struct ifnet *ifp = ifq->ifq_if; + struct mbuf *m; + + while ((m = ifq_dequeue(ifq)) != NULL) + etherip_send(ifp, m); +} + +int +etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, +struct rtentry *rt) +{ + int error; + + m = ether_encap(ifp, m, dst, rt, ); + if (m == NULL) + return (error); + + if (ifq_is_priq(>if_snd)) { + etherip_send(ifp, m); + return (0); } + + return (ifq_enqueue(>if_snd, m)); } int Index: net/if_ethersubr.c === RCS file: /cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.253 diff -u -p -r1.253 if_ethersubr.c --- net/if_ethersubr.c 13 Mar 2018 01:31:48 - 1.253 +++ net/if_ethersubr.c 30 Nov 2018 02:24:09 - @@ -483,7 +510,8 @@ ether_ifattach(struct ifnet *ifp) ifp->if_addrlen = ETHER_ADDR_LEN; ifp->if_hdrlen = ETHER_HDR_LEN; ifp->if_mtu = ETHERMTU; - ifp->if_output = ether_output; + if (ifp->if_output == NULL) +