Re: split ether_output up into resolution, encapsulation, and output

2018-12-04 Thread Claudio Jeker
On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote:
> i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
> provide their own output functions so they can bypass the ifq machinery
> and push the packet onto the underlying layer directly.
> 
> they'll still need to get an ethernet header though. vlan needs to get
> the ethernet header and put the vlan shim into it, therefore
> ether_resolve is exposed. etherip doesnt need a shim, it just wants
> ethernet encapsulating the payload before adding its own headers to the
> packet, therefore there is ether_encap.
> 
> does this make sense?
> 
> ok?

Two comments below. Apart from that OK claudio@

> Index: net/if_ethersubr.c
> ===
> RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.253
> diff -u -p -r1.253 if_ethersubr.c
> --- net/if_ethersubr.c13 Mar 2018 01:31:48 -  1.253
> +++ net/if_ethersubr.c30 Nov 2018 02:02:58 -
> @@ -178,24 +178,18 @@ ether_rtrequest(struct ifnet *ifp, int r
>   break;
>   }
>  }
> -/*
> - * Ethernet output routine.
> - * Encapsulate a packet of type family for the local net.
> - * Assumes that ifp is actually pointer to arpcom structure.
> - */
> +
>  int
> -ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> -struct rtentry *rt)
> +ether_resolve(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> +struct rtentry *rt, struct ether_header *eh)
>  {
> - u_int16_t etype;
> - u_char edst[ETHER_ADDR_LEN];
> - u_char *esrc;
> - struct mbuf *mcopy = NULL;
> - struct ether_header *eh;
>   struct arpcom *ac = (struct arpcom *)ifp;
>   sa_family_t af = dst->sa_family;
>   int error = 0;
>  
> + if (!ISSET(ifp->if_flags, IFF_RUNNING))
> + senderr(ENETDOWN);
> +
>   KASSERT(rt != NULL || ISSET(m->m_flags, M_MCAST|M_BCAST) ||
>   af == AF_UNSPEC || af == pseudo_AF_HDRCMPLT);
>  
> @@ -207,28 +201,31 @@ ether_output(struct ifnet *ifp, struct m
>   }
>  #endif
>  
> - esrc = ac->ac_enaddr;
> -
> - if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
> - senderr(ENETDOWN);
> -
>   switch (af) {
>   case AF_INET:
> - error = arpresolve(ifp, rt, m, dst, edst);
> + error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
>   if (error)
> - return (error == EAGAIN ? 0 : error);
> + return (error);
> + eh->ether_type = htons(ETHERTYPE_IP);
> +
>   /* If broadcasting on a simplex interface, loopback a copy */
> - if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) &&
> - !m->m_pkthdr.pf.routed)
> + if (ISSET(m->m_flags, M_BCAST) &&
> + ISSET(ifp->if_flags, IFF_SIMPLEX) &&
> + !m->m_pkthdr.pf.routed) {
> + struct mbuf *mcopy;
> +
> + /* XXX Should we input an unencrypted IPsec packet? */
>   mcopy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> - etype = htons(ETHERTYPE_IP);
> + if (mcopy != NULL)
> + if_input_local(ifp, mcopy, af);
> + }
>   break;
>  #ifdef INET6
>   case AF_INET6:
> - error = nd6_resolve(ifp, rt, m, dst, edst);
> + error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
>   if (error)
> - return (error == EAGAIN ? 0 : error);
> - etype = htons(ETHERTYPE_IPV6);
> + return (error);
> + eh->ether_type = htons(ETHERTYPE_IPV6);
>   break;
>  #endif
>  #ifdef MPLS
> @@ -242,72 +239,102 @@ ether_output(struct ifnet *ifp, struct m
>   senderr(ENETUNREACH);
>  
>   switch (dst->sa_family) {
> - case AF_LINK:
> - if (satosdl(dst)->sdl_alen < sizeof(edst))
> - senderr(EHOSTUNREACH);
> - memcpy(edst, LLADDR(satosdl(dst)),
> - sizeof(edst));
> - break;
> + case AF_LINK:
> + if (satosdl(dst)->sdl_alen < sizeof(eh->ether_dhost))
> + senderr(EHOSTUNREACH);
> + memcpy(eh->ether_dhost, LLADDR(satosdl(dst)),
> + sizeof(eh->ether_dhost));
> + break;
>  #ifdef INET6
> - case AF_INET6:
> - error = nd6_resolve(ifp, rt, m, dst, edst);
> - if (error)
> - return (error == EAGAIN ? 0 : error);
> - break;
> + case AF_INET6:
> + error = nd6_resolve(ifp, rt, m, 

Re: split ether_output up into resolution, encapsulation, and output

2018-11-30 Thread Claudio Jeker
On Fri, Nov 30, 2018 at 02:04:40PM -0200, Martin Pieuchot wrote:
> On 30/11/18(Fri) 12:35, David Gwynne wrote:
> > On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote:
> > > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
> > > provide their own output functions so they can bypass the ifq machinery
> > > and push the packet onto the underlying layer directly.
> > > 
> > > they'll still need to get an ethernet header though. vlan needs to get
> > > the ethernet header and put the vlan shim into it, therefore
> > > ether_resolve is exposed. etherip doesnt need a shim, it just wants
> > > ethernet encapsulating the payload before adding its own headers to the
> > > packet, therefore there is ether_encap.
> > > 
> > > does this make sense?
> > > 
> > > ok?
> > 
> > this shows vlan and etherip using the new functions.
> 
> You're still using ifq_enqueue() and ifq_dequeue(). So, I'm not sure to
> understand what is the gain of this change.

If I understood the change correctly it will bypass queueing when no HFSC
queueing is configured on that interface. If there is no traffic shaping
in use the ifp_output function will directly call the next ifp_output
function or in the case of etherip / gre /gif it will call ip_send.

IMO this is a step in the right direction.
 
> > Index: net/if_etherip.c
> > ===
> > RCS file: /cvs/src/sys/net/if_etherip.c,v
> > retrieving revision 1.40
> > diff -u -p -r1.40 if_etherip.c
> > --- net/if_etherip.c12 Nov 2018 23:57:06 -  1.40
> > +++ net/if_etherip.c30 Nov 2018 02:24:09 -
> > @@ -102,7 +102,10 @@ void etheripattach(int);
> >  int etherip_clone_create(struct if_clone *, int);
> >  int etherip_clone_destroy(struct ifnet *);
> >  int etherip_ioctl(struct ifnet *, u_long, caddr_t);
> > -void etherip_start(struct ifnet *);
> > +int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> > +struct rtentry *rt);
> > +void etherip_start(struct ifqueue *);
> > +void etherip_send(struct ifnet *, struct mbuf *);
> >  int etherip_media_change(struct ifnet *);
> >  void etherip_media_status(struct ifnet *, struct ifmediareq *);
> >  int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *);
> > @@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if
> > ifp->if_softc = sc;
> > ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
> > ifp->if_ioctl = etherip_ioctl;
> > -   ifp->if_start = etherip_start;
> > +   ifp->if_output = etherip_output;
> > +   ifp->if_qstart = etherip_start;
> > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> > -   ifp->if_xflags = IFXF_CLONED;
> > +   ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED;
> > IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN);
> > ifp->if_capabilities = IFCAP_VLAN_MTU;
> > ether_fakeaddr(ifp);
> > @@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp, 
> >  }
> >  
> >  void
> > -etherip_start(struct ifnet *ifp)
> > +etherip_send(struct ifnet *ifp, struct mbuf *m)
> >  {
> > struct etherip_softc *sc = ifp->if_softc;
> > -   struct mbuf *m;
> > int error;
> > -#if NBPFILTER > 0
> > -   caddr_t if_bpf;
> > -#endif
> >  
> > -   while ((m = ifq_dequeue(>if_snd)) != NULL) {
> >  #if NBPFILTER > 0
> > -   if_bpf = ifp->if_bpf;
> > -   if (if_bpf)
> > -   bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
> > +   caddr_t if_bpf = ifp->if_bpf;
> > +   if (if_bpf)
> > +   bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
> >  #endif
> >  
> > -   switch (sc->sc_tunnel.t_af) {
> > -   case AF_INET:
> > -   error = ip_etherip_output(ifp, m);
> > -   break;
> > +   switch (sc->sc_tunnel.t_af) {
> > +   case AF_INET:
> > +   error = ip_etherip_output(ifp, m);
> > +   break;
> >  #ifdef INET6
> > -   case AF_INET6:
> > -   error = ip6_etherip_output(ifp, m);
> > -   break;
> > +   case AF_INET6:
> > +   error = ip6_etherip_output(ifp, m);
> > +   break;
> >  #endif
> > -   default:
> > -   /* unhandled_af(sc->sc_tunnel.t_af); */
> > -   m_freem(m);
> > -   continue;
> > -   }
> > +   default:
> > +   /* unhandled_af(sc->sc_tunnel.t_af); */
> > +   m_freem(m);
> > +   error = ENETDOWN;
> > +   break;
> > +   }
> >  
> > -   if (error)
> > -   ifp->if_oerrors++;
> > +   if (error)
> > +   ifp->if_oerrors++;
> > +}
> > +
> > +void
> > +etherip_start(struct ifqueue *ifq)
> > +{
> > +   struct ifnet *ifp = ifq->ifq_if;
> > +   struct mbuf *m;
> > +
> > +   while ((m = ifq_dequeue(ifq)) != NULL)
> > +   etherip_send(ifp, m);
> > +}
> > +
> > +int
> > +etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> > +struct rtentry *rt)
> > +{
> > +   int error;
> > +
> > +   m = 

Re: split ether_output up into resolution, encapsulation, and output

2018-11-30 Thread Martin Pieuchot
On 30/11/18(Fri) 12:35, David Gwynne wrote:
> On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote:
> > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
> > provide their own output functions so they can bypass the ifq machinery
> > and push the packet onto the underlying layer directly.
> > 
> > they'll still need to get an ethernet header though. vlan needs to get
> > the ethernet header and put the vlan shim into it, therefore
> > ether_resolve is exposed. etherip doesnt need a shim, it just wants
> > ethernet encapsulating the payload before adding its own headers to the
> > packet, therefore there is ether_encap.
> > 
> > does this make sense?
> > 
> > ok?
> 
> this shows vlan and etherip using the new functions.

You're still using ifq_enqueue() and ifq_dequeue(). So, I'm not sure to
understand what is the gain of this change.

> Index: net/if_etherip.c
> ===
> RCS file: /cvs/src/sys/net/if_etherip.c,v
> retrieving revision 1.40
> diff -u -p -r1.40 if_etherip.c
> --- net/if_etherip.c  12 Nov 2018 23:57:06 -  1.40
> +++ net/if_etherip.c  30 Nov 2018 02:24:09 -
> @@ -102,7 +102,10 @@ void etheripattach(int);
>  int etherip_clone_create(struct if_clone *, int);
>  int etherip_clone_destroy(struct ifnet *);
>  int etherip_ioctl(struct ifnet *, u_long, caddr_t);
> -void etherip_start(struct ifnet *);
> +int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> +struct rtentry *rt);
> +void etherip_start(struct ifqueue *);
> +void etherip_send(struct ifnet *, struct mbuf *);
>  int etherip_media_change(struct ifnet *);
>  void etherip_media_status(struct ifnet *, struct ifmediareq *);
>  int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *);
> @@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if
>   ifp->if_softc = sc;
>   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
>   ifp->if_ioctl = etherip_ioctl;
> - ifp->if_start = etherip_start;
> + ifp->if_output = etherip_output;
> + ifp->if_qstart = etherip_start;
>   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> - ifp->if_xflags = IFXF_CLONED;
> + ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED;
>   IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN);
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
>   ether_fakeaddr(ifp);
> @@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp, 
>  }
>  
>  void
> -etherip_start(struct ifnet *ifp)
> +etherip_send(struct ifnet *ifp, struct mbuf *m)
>  {
>   struct etherip_softc *sc = ifp->if_softc;
> - struct mbuf *m;
>   int error;
> -#if NBPFILTER > 0
> - caddr_t if_bpf;
> -#endif
>  
> - while ((m = ifq_dequeue(>if_snd)) != NULL) {
>  #if NBPFILTER > 0
> - if_bpf = ifp->if_bpf;
> - if (if_bpf)
> - bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
> + caddr_t if_bpf = ifp->if_bpf;
> + if (if_bpf)
> + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
>  #endif
>  
> - switch (sc->sc_tunnel.t_af) {
> - case AF_INET:
> - error = ip_etherip_output(ifp, m);
> - break;
> + switch (sc->sc_tunnel.t_af) {
> + case AF_INET:
> + error = ip_etherip_output(ifp, m);
> + break;
>  #ifdef INET6
> - case AF_INET6:
> - error = ip6_etherip_output(ifp, m);
> - break;
> + case AF_INET6:
> + error = ip6_etherip_output(ifp, m);
> + break;
>  #endif
> - default:
> - /* unhandled_af(sc->sc_tunnel.t_af); */
> - m_freem(m);
> - continue;
> - }
> + default:
> + /* unhandled_af(sc->sc_tunnel.t_af); */
> + m_freem(m);
> + error = ENETDOWN;
> + break;
> + }
>  
> - if (error)
> - ifp->if_oerrors++;
> + if (error)
> + ifp->if_oerrors++;
> +}
> +
> +void
> +etherip_start(struct ifqueue *ifq)
> +{
> + struct ifnet *ifp = ifq->ifq_if;
> + struct mbuf *m;
> +
> + while ((m = ifq_dequeue(ifq)) != NULL)
> + etherip_send(ifp, m);
> +}
> +
> +int
> +etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> +struct rtentry *rt)
> +{
> + int error;
> +
> + m = ether_encap(ifp, m, dst, rt, );
> + if (m == NULL)
> + return (error);
> +
> + if (ifq_is_priq(>if_snd)) {
> + etherip_send(ifp, m);
> + return (0);
>   }
> +
> + return (ifq_enqueue(>if_snd, m));
>  }
>  
>  int
> Index: net/if_ethersubr.c
> ===
> RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.253
> diff -u -p -r1.253 if_ethersubr.c
> --- net/if_ethersubr.c13 Mar 2018 01:31:48 -  1.253
> +++ 

Re: split ether_output up into resolution, encapsulation, and output

2018-11-29 Thread David Gwynne
On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote:
> i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
> provide their own output functions so they can bypass the ifq machinery
> and push the packet onto the underlying layer directly.
> 
> they'll still need to get an ethernet header though. vlan needs to get
> the ethernet header and put the vlan shim into it, therefore
> ether_resolve is exposed. etherip doesnt need a shim, it just wants
> ethernet encapsulating the payload before adding its own headers to the
> packet, therefore there is ether_encap.
> 
> does this make sense?
> 
> ok?

this shows vlan and etherip using the new functions.

Index: net/if_etherip.c
===
RCS file: /cvs/src/sys/net/if_etherip.c,v
retrieving revision 1.40
diff -u -p -r1.40 if_etherip.c
--- net/if_etherip.c12 Nov 2018 23:57:06 -  1.40
+++ net/if_etherip.c30 Nov 2018 02:24:09 -
@@ -102,7 +102,10 @@ void etheripattach(int);
 int etherip_clone_create(struct if_clone *, int);
 int etherip_clone_destroy(struct ifnet *);
 int etherip_ioctl(struct ifnet *, u_long, caddr_t);
-void etherip_start(struct ifnet *);
+int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+struct rtentry *rt);
+void etherip_start(struct ifqueue *);
+void etherip_send(struct ifnet *, struct mbuf *);
 int etherip_media_change(struct ifnet *);
 void etherip_media_status(struct ifnet *, struct ifmediareq *);
 int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *);
@@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if
ifp->if_softc = sc;
ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
ifp->if_ioctl = etherip_ioctl;
-   ifp->if_start = etherip_start;
+   ifp->if_output = etherip_output;
+   ifp->if_qstart = etherip_start;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
-   ifp->if_xflags = IFXF_CLONED;
+   ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED;
IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN);
ifp->if_capabilities = IFCAP_VLAN_MTU;
ether_fakeaddr(ifp);
@@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp, 
 }
 
 void
-etherip_start(struct ifnet *ifp)
+etherip_send(struct ifnet *ifp, struct mbuf *m)
 {
struct etherip_softc *sc = ifp->if_softc;
-   struct mbuf *m;
int error;
-#if NBPFILTER > 0
-   caddr_t if_bpf;
-#endif
 
-   while ((m = ifq_dequeue(>if_snd)) != NULL) {
 #if NBPFILTER > 0
-   if_bpf = ifp->if_bpf;
-   if (if_bpf)
-   bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
+   caddr_t if_bpf = ifp->if_bpf;
+   if (if_bpf)
+   bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
 #endif
 
-   switch (sc->sc_tunnel.t_af) {
-   case AF_INET:
-   error = ip_etherip_output(ifp, m);
-   break;
+   switch (sc->sc_tunnel.t_af) {
+   case AF_INET:
+   error = ip_etherip_output(ifp, m);
+   break;
 #ifdef INET6
-   case AF_INET6:
-   error = ip6_etherip_output(ifp, m);
-   break;
+   case AF_INET6:
+   error = ip6_etherip_output(ifp, m);
+   break;
 #endif
-   default:
-   /* unhandled_af(sc->sc_tunnel.t_af); */
-   m_freem(m);
-   continue;
-   }
+   default:
+   /* unhandled_af(sc->sc_tunnel.t_af); */
+   m_freem(m);
+   error = ENETDOWN;
+   break;
+   }
 
-   if (error)
-   ifp->if_oerrors++;
+   if (error)
+   ifp->if_oerrors++;
+}
+
+void
+etherip_start(struct ifqueue *ifq)
+{
+   struct ifnet *ifp = ifq->ifq_if;
+   struct mbuf *m;
+
+   while ((m = ifq_dequeue(ifq)) != NULL)
+   etherip_send(ifp, m);
+}
+
+int
+etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+struct rtentry *rt)
+{
+   int error;
+
+   m = ether_encap(ifp, m, dst, rt, );
+   if (m == NULL)
+   return (error);
+
+   if (ifq_is_priq(>if_snd)) {
+   etherip_send(ifp, m);
+   return (0);
}
+
+   return (ifq_enqueue(>if_snd, m));
 }
 
 int
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.253
diff -u -p -r1.253 if_ethersubr.c
--- net/if_ethersubr.c  13 Mar 2018 01:31:48 -  1.253
+++ net/if_ethersubr.c  30 Nov 2018 02:24:09 -
@@ -483,7 +510,8 @@ ether_ifattach(struct ifnet *ifp)
ifp->if_addrlen = ETHER_ADDR_LEN;
ifp->if_hdrlen = ETHER_HDR_LEN;
ifp->if_mtu = ETHERMTU;
-   ifp->if_output = ether_output;
+   if (ifp->if_output == NULL)
+