On 18/12/18(Tue) 12:13, Denis Fondras wrote:
> Here is a serie of diffs to enable MPLSv6, MPLS transport over IPv6.

Nice!

> First diff : allow mpe(4) to handle IPv6 trafic.

Comments below
 
> Index: net/if_ethersubr.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.255
> diff -u -p -r1.255 if_ethersubr.c
> --- net/if_ethersubr.c        12 Dec 2018 05:38:26 -0000      1.255
> +++ net/if_ethersubr.c        18 Dec 2018 11:03:33 -0000
> @@ -246,18 +246,28 @@ ether_resolve(struct ifnet *ifp, struct 
>                           sizeof(eh->ether_dhost));
>                       break;
>  #ifdef INET6
> +do_v6:
>               case AF_INET6:
>                       error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
>                       if (error)
>                               return (error);
>                       break;
>  #endif
> +do_v4:
>               case AF_INET:
> -             case AF_MPLS:
>                       error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
>                       if (error)
>                               return (error);
>                       break;
> +             case AF_MPLS:
> +                     switch (rt->rt_gateway->sa_family) {
> +                     case AF_INET:
> +                             goto do_v4;
> +#ifdef INET6
> +                     case AF_INET6:
> +                             goto do_v6;
> +#endif
> +                     }
>               default:
>                       senderr(EHOSTUNREACH);
>               }

This makes ether_resolve() (aka ether_output()) even more spaghetti for MPLS.

I know this is not your code, but is it possible to do better than that?
The current logic hides an encapsulation.  For example would it be possible
to call ether_resolve() recursively in the AF_MPLS case?

> Index: net/if_mpe.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_mpe.c,v
> retrieving revision 1.64
> diff -u -p -r1.64 if_mpe.c
> --- net/if_mpe.c      9 Jan 2018 15:24:24 -0000       1.64
> +++ net/if_mpe.c      18 Dec 2018 11:03:33 -0000
> @@ -85,7 +85,7 @@ mpe_clone_create(struct if_clone *ifc, i
>       mpeif->sc_unit = unit;
>       ifp = &mpeif->sc_if;
>       snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit);
> -     ifp->if_flags = IFF_POINTOPOINT;
> +     ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;

You added this flag because the nd6 codes relies on it, right?  But does
this reflect reality?

>       ifp->if_xflags = IFXF_CLONED;
>       ifp->if_softc = mpeif;
>       ifp->if_mtu = MPE_MTU;
> @@ -157,6 +157,16 @@ mpestart(struct ifnet *ifp0)
>                           sizeof(in_addr_t));
>                       m_adj(m, sizeof(in_addr_t));
>                       break;
> +#ifdef INET6
> +             case AF_INET6:
> +                     memset(sa, 0, sizeof(struct sockaddr_in6));
> +                     satosin6(sa)->sin6_family = af;
> +                     satosin6(sa)->sin6_len = sizeof(struct sockaddr_in6);
> +                     bcopy(mtod(m, caddr_t), &satosin6(sa)->sin6_addr,
> +                         sizeof(struct in6_addr));
> +                     m_adj(m, sizeof(struct in6_addr));
> +                     break;
> +#endif
>               default:
>                       m_freem(m);
>                       continue;
> @@ -204,6 +214,9 @@ mpeoutput(struct ifnet *ifp, struct mbuf
>       int             error;
>       int             off;
>       in_addr_t       addr;
> +#ifdef INET6
> +     struct in6_addr addr6;
> +#endif
>       u_int8_t        op = 0;
>  
>  #ifdef DIAGNOSTIC
> @@ -251,6 +264,39 @@ mpeoutput(struct ifnet *ifp, struct mbuf
>               m_copyback(m, sizeof(sa_family_t), sizeof(in_addr_t),
>                   &addr, M_NOWAIT);
>               break;
> +#ifdef INET6
> +     case AF_INET6:
> +             if (!rt || !(rt->rt_flags & RTF_MPLS)) {

I know you copied past, but checking for rt != NULL should be avoided.
You should use rtisvalid().  Could you please merge the blocks related
to rtentry.  I general if you can reduce the copy/paste between v4 & v6
that would be great!

Another style neat, we use the ISSET() macro for checking RTF_* flags ;)

> +                     m_freem(m);
> +                     error = ENETUNREACH;
> +                     goto out;
> +             }
> +             shim.shim_label = ((struct rt_mpls *)rt->rt_llinfo)->mpls_label;
> +             shim.shim_label |= MPLS_BOS_MASK;
> +             op =  ((struct rt_mpls *)rt->rt_llinfo)->mpls_operation;
> +             if (op != MPLS_OP_PUSH) {
> +                     m_freem(m);
> +                     error = ENETUNREACH;
> +                     goto out;
> +             }
> +             if (mpls_mapttl_ip) {
> +                     struct ip6_hdr  *ip6;
> +                     ip6 = mtod(m, struct ip6_hdr *);
> +                     shim.shim_label |= htonl(ip6->ip6_hops) & MPLS_TTL_MASK;
> +             } else
> +                     shim.shim_label |= htonl(mpls_defttl) & MPLS_TTL_MASK;
> +             off = sizeof(sa_family_t) + sizeof(struct in6_addr);
> +             M_PREPEND(m, sizeof(shim) + off, M_DONTWAIT);
> +             if (m == NULL) {
> +                     error = ENOBUFS;
> +                     goto out;
> +             }
> +             *mtod(m, sa_family_t *) = AF_INET6;
> +             addr6 = satosin6(rt->rt_gateway)->sin6_addr;
> +             m_copyback(m, sizeof(sa_family_t), sizeof(struct in6_addr),
> +                 &addr6, M_NOWAIT);
> +             break;
> +#endif
>       default:
>               m_freem(m);
>               error = EPFNOSUPPORT;
> @@ -354,6 +400,9 @@ mpeioctl(struct ifnet *ifp, u_long cmd, 
>               }
>               /* return with ENOTTY so that the parent handler finishes */
>               return (ENOTTY);
> +     case SIOCADDMULTI:
> +     case SIOCDELMULTI:
> +             break;

Why do you need these handlers?  This is hiding something wrong :)

>       default:
>               return (ENOTTY);
>       }
> Index: netinet/icmp6.h
> ===================================================================
> RCS file: /cvs/src/sys/netinet/icmp6.h,v
> retrieving revision 1.47
> diff -u -p -r1.47 icmp6.h
> --- netinet/icmp6.h   8 Aug 2017 18:15:58 -0000       1.47
> +++ netinet/icmp6.h   18 Dec 2018 11:03:33 -0000
> @@ -610,16 +610,17 @@ struct  rtentry;
>  struct       rttimer;
>  struct       in6_multi;
>  
> -void icmp6_init(void);
> -void icmp6_paramerror(struct mbuf *, int);
> -void icmp6_error(struct mbuf *, int, int, int);
> -int  icmp6_input(struct mbuf **, int *, int, int);
> -void icmp6_fasttimo(void);
> -void icmp6_reflect(struct mbuf *, size_t);
> -void icmp6_prepare(struct mbuf *);
> -void icmp6_redirect_input(struct mbuf *, int);
> -void icmp6_redirect_output(struct mbuf *, struct rtentry *);
> -int  icmp6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
> +void          icmp6_init(void);
> +void          icmp6_paramerror(struct mbuf *, int);
> +struct mbuf  *icmp6_do_error(struct mbuf *, int, int, int);
> +void          icmp6_error(struct mbuf *, int, int, int);
> +int           icmp6_input(struct mbuf **, int *, int, int);
> +void          icmp6_fasttimo(void);
> +int           icmp6_reflect(struct mbuf *, size_t, u_int32_t);
> +void          icmp6_prepare(struct mbuf *);
> +void          icmp6_redirect_input(struct mbuf *, int);
> +void          icmp6_redirect_output(struct mbuf *, struct rtentry *);
> +int           icmp6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
>  
>  struct       ip6ctlparam;
>  void icmp6_mtudisc_update(struct ip6ctlparam *, int);

Introducing icmp6_do_error() could be committed as a separated diff.  At
least for coherency with v4 :)

> Index: netinet6/icmp6.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/icmp6.c,v
> retrieving revision 1.228
> diff -u -p -r1.228 icmp6.c
> --- netinet6/icmp6.c  10 Dec 2018 23:00:01 -0000      1.228
> +++ netinet6/icmp6.c  18 Dec 2018 11:03:33 -0000
> @@ -102,6 +102,10 @@
>  #include <net/pfvar.h>
>  #endif
>  
> +#ifdef MPLS
> +#include <netmpls/mpls.h>
> +#endif

Can you please find another way than spreading an #ifdef MPLS in IPv6
code?

> +
>  struct cpumem *icmp6counters;
>  
>  extern int icmp6errppslim;
> @@ -231,11 +235,8 @@ icmp6_mtudisc_callback_register(void (*f
>       LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list);
>  }
>  
> -/*
> - * Generate an error packet of type error in response to bad IP6 packet.
> - */
> -void
> -icmp6_error(struct mbuf *m, int type, int code, int param)
> +struct mbuf *
> +icmp6_do_error(struct mbuf *m, int type, int code, int param)
>  {
>       struct ip6_hdr *oip6, *nip6;
>       struct icmp6_hdr *icmp6;
> @@ -250,8 +251,9 @@ icmp6_error(struct mbuf *m, int type, in
>  
>       if (m->m_len < sizeof(struct ip6_hdr)) {
>               m = m_pullup(m, sizeof(struct ip6_hdr));
> -             if (m == NULL)
> -                     return;
> +             if (m == NULL) {
> +                     return (NULL);
> +             }
>       }
>       oip6 = mtod(m, struct ip6_hdr *);
>  
> @@ -294,7 +296,7 @@ icmp6_error(struct mbuf *m, int type, in
>                       sizeof(*icp));
>               if (icp == NULL) {
>                       icmp6stat_inc(icp6s_tooshort);
> -                     return;
> +                     return (NULL);
>               }
>               if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
>                   icp->icmp6_type == ND_REDIRECT) {
> @@ -334,7 +336,7 @@ icmp6_error(struct mbuf *m, int type, in
>               m = m_pullup(m, preplen);
>       if (m == NULL) {
>               nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
> -             return;
> +             return (NULL);
>       }
>  
>       nip6 = mtod(m, struct ip6_hdr *);
> @@ -361,18 +363,33 @@ icmp6_error(struct mbuf *m, int type, in
>       m->m_pkthdr.ph_ifidx = 0;
>  
>       icmp6stat_inc(icp6s_outhist + type);
> -     icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - 
> ICMPv6 */
>  
> -     return;
> +     return (m);
>  
>    freeit:
>       /*
>        * If we can't tell wheter or not we can generate ICMP6, free it.
>        */
> -     m_freem(m);
> +     return(m_freem(m));
>  }
>  
>  /*
> + * Generate an error packet of type error in response to bad IP6 packet.
> + */
> +void
> +icmp6_error(struct mbuf *m, int type, int code, int param)
> +{
> +     struct mbuf     *n;
> +
> +     n = icmp6_do_error(m, type, code, param);
> +     if (n != NULL) {
> +             /* header order: IPv6 - ICMPv6 */
> +             if (!icmp6_reflect(n, sizeof(struct ip6_hdr), -1))
> +                     ip6_send(n);
> +     }
> +}                                                                    
> +
> +/*
>   * Process a received ICMP6 message.
>   */
>  int
> @@ -597,7 +614,8 @@ icmp6_input(struct mbuf **mp, int *offp,
>                       nicmp6->icmp6_code = 0;
>                       icmp6stat_inc(icp6s_reflect);
>                       icmp6stat_inc(icp6s_outhist + ICMP6_ECHO_REPLY);
> -                     icmp6_reflect(n, noff);
> +                     if (!icmp6_reflect(n, noff, -1))
> +                             ip6_send(n);
>               }
>               if (!m)
>                       goto freeit;
> @@ -1030,15 +1048,17 @@ icmp6_mtudisc_update(struct ip6ctlparam 
>   * Reflect the ip6 packet back to the source.
>   * OFF points to the icmp6 header, counted from the top of the mbuf.
>   */
> -void
> -icmp6_reflect(struct mbuf *m, size_t off)
> +int
> +icmp6_reflect(struct mbuf *m, size_t off, u_int32_t mpls_label)
>  {
>       struct rtentry *rt = NULL;
>       struct ip6_hdr *ip6;
>       struct icmp6_hdr *icmp6;
>       struct in6_addr t, *src = NULL;
>       struct sockaddr_in6 sa6_src, sa6_dst;
> +     struct in6_ifaddr *ia6;
>       u_int rtableid;
> +     char addr[INET6_ADDRSTRLEN];
>  
>       CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN);
>  
> @@ -1051,8 +1071,10 @@ icmp6_reflect(struct mbuf *m, size_t off
>               goto bad;
>       }
>  
> -     if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP)
> -             goto bad;
> +     if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) {
> +             m_freem(m);
> +             return (ELOOP);
> +     }
>       rtableid = m->m_pkthdr.ph_rtableid;
>       m_resethdr(m);
>       m->m_pkthdr.ph_rtableid = rtableid;
> @@ -1071,7 +1093,7 @@ icmp6_reflect(struct mbuf *m, size_t off
>               l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
>               if (m->m_len < l) {
>                       if ((m = m_pullup(m, l)) == NULL)
> -                             return;
> +                             return (EMSGSIZE);
>               }
>               memcpy(mtod(m, caddr_t), &nip6, sizeof(nip6));
>       } else /* off == sizeof(struct ip6_hdr) */ {
> @@ -1079,7 +1101,7 @@ icmp6_reflect(struct mbuf *m, size_t off
>               l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
>               if (m->m_len < l) {
>                       if ((m = m_pullup(m, l)) == NULL)
> -                             return;
> +                             return (EMSGSIZE);
>               }
>       }
>       ip6 = mtod(m, struct ip6_hdr *);
> @@ -1094,8 +1116,8 @@ icmp6_reflect(struct mbuf *m, size_t off
>       ip6->ip6_dst = ip6->ip6_src;
>  
>       /*
> -      * XXX: make sure to embed scope zone information, using
> -      * already embedded IDs or the received interface (if any).
> +      * XXX: make sure to embed scope zone information, using already
> +      * embedded IDs or the received interface (if any).
>        * Note that rcvif may be NULL.
>        * TODO: scoped routing case (XXX).
>        */
> @@ -1107,39 +1129,23 @@ icmp6_reflect(struct mbuf *m, size_t off
>       sa6_dst.sin6_family = AF_INET6;
>       sa6_dst.sin6_len = sizeof(sa6_dst);
>       sa6_dst.sin6_addr = t;
> +     
> +#ifdef MPLS
> +     if (mpls_label != -1) {
> +             struct sockaddr_mpls    sa_mpls, *smpls;
> +             /* set ip6_src to something usable, based on the MPLS label */
> +             bzero(&sa_mpls, sizeof(sa_mpls));
> +             smpls = &sa_mpls;
> +             smpls->smpls_family = AF_MPLS;
> +             smpls->smpls_len = sizeof(*smpls);
> +             smpls->smpls_label = mpls_label;
>  
> -     /*
> -      * If the incoming packet was addressed directly to us (i.e. unicast),
> -      * use dst as the src for the reply.
> -      * The IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED case would be VERY rare,
> -      * but is possible (for example) when we encounter an error while
> -      * forwarding procedure destined to a duplicated address of ours.
> -      */
> -     rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid);
> -     if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
> -         !ISSET(ifatoia6(rt->rt_ifa)->ia6_flags,
> -         IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) {
> -             src = &t;
> -     }
> -     rtfree(rt);
> -     rt = NULL;
> -
> -     if (src == NULL) {
> -             struct in6_ifaddr *ia6;
> -
> -             /*
> -              * This case matches to multicasts, our anycast, or unicasts
> -              * that we do not own.  Select a source address based on the
> -              * source address of the erroneous packet.
> -              */
> -             rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE, rtableid);
> +             rt = rtalloc(smplstosa(smpls), RT_RESOLVE, rtableid);
>               if (!rtisvalid(rt)) {
> -                     char addr[INET6_ADDRSTRLEN];
> -
>                       nd6log((LOG_DEBUG,
>                           "%s: source can't be determined: dst=%s\n",
>                           __func__, inet_ntop(AF_INET6, &sa6_src.sin6_addr,
> -                             addr, sizeof(addr))));
> +                         addr, sizeof(addr))));
>                       rtfree(rt);
>                       goto bad;
>               }
> @@ -1149,6 +1155,48 @@ icmp6_reflect(struct mbuf *m, size_t off
>               if (src == NULL)
>                       src = &ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
>       }
> +#endif
> +     if (src == NULL) {
> +             /*
> +              * If the incoming packet was addressed directly to us (i.e.
> +              * unicast), use dst as the src for the reply. The
> +              * IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED case would be VERY rare,
> +              * but is possible (for example) when we encounter an error
> +              * while forwarding procedure destined to a duplicated address
> +              * of ours.
> +              */
> +             rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid);
> +             if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
> +                 !ISSET(ifatoia6(rt->rt_ifa)->ia6_flags,
> +                 IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) {
> +                     src = &t;
> +             }
> +             rtfree(rt);
> +             rt = NULL;
> +     
> +             if (src == NULL) {
> +                     /*
> +                      * This case matches to multicasts, our anycast, or
> +                      * unicasts that we do not own. Select a source address
> +                      * based on the source address of the erroneous packet.
> +                      */
> +                     rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE, rtableid);
> +                     if (!rtisvalid(rt)) {
> +                             nd6log((LOG_DEBUG,
> +                                 "%s: source can't be determined: dst=%s\n",
> +                                 __func__, inet_ntop(AF_INET6,
> +                                 &sa6_src.sin6_addr, addr, sizeof(addr))));
> +                             rtfree(rt);
> +                             goto bad;
> +                     }
> +                     ia6 = in6_ifawithscope(rt->rt_ifa->ifa_ifp, &t,
> +                         rtableid);
> +                     if (ia6 != NULL)
> +                             src = &ia6->ia_addr.sin6_addr;
> +                     if (src == NULL)
> +                             src = &ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
> +             }
> +     }
>  
>       ip6->ip6_src = *src;
>       rtfree(rt);
> @@ -1167,13 +1215,11 @@ icmp6_reflect(struct mbuf *m, size_t off
>        */
>  
>       m->m_flags &= ~(M_BCAST|M_MCAST);
> -
> -     ip6_send(m);
> -     return;
> +     return (0);
>  
>   bad:
>       m_freem(m);
> -     return;
> +     return (EHOSTUNREACH);
>  }
>  
>  void
> Index: netmpls/mpls_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netmpls/mpls_input.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 mpls_input.c
> --- netmpls/mpls_input.c      12 Jan 2018 06:57:56 -0000      1.68
> +++ netmpls/mpls_input.c      18 Dec 2018 11:03:33 -0000
> @@ -36,6 +36,8 @@
>  
>  #ifdef INET6
>  #include <netinet/ip6.h>
> +#include <netinet6/ip6_var.h>
> +#include <netinet/icmp6.h>
>  #endif /* INET6 */
>  
>  #include <netmpls/mpls.h>
> @@ -200,8 +202,19 @@ do_v6:
>  #if NMPE > 0
>               if (ifp->if_type == IFT_MPLS) {
>                       smpls = satosmpls(rt_key(rt));
> -                     mpe_input(m, ifp, smpls, ttl);
> -                     goto done;
> +                     if (m->m_len < sizeof(u_char) &&
> +                         (m = m_pullup(m, sizeof(u_char))) == NULL)
> +                             return;
> +                     switch (*mtod(m, u_char *) >> 4) {
> +                     case IPVERSION:
> +                             mpe_input(m, ifp, smpls, ttl);
> +                             goto done;
> +#ifdef INET6
> +                     case IPV6_VERSION >> 4:
> +                             mpe_input6(m, ifp, smpls, ttl);
> +                             goto done;
> +#endif
> +                     }
>               }
>  #endif
>               if (ifp->if_type == IFT_MPLSTUNNEL) {
> @@ -345,6 +358,9 @@ mpls_do_error(struct mbuf *m, int type, 
>       struct icmp *icp;
>       struct ip *ip;
>       int nstk, error;
> +#ifdef INET6
> +     struct ip6_hdr          *ip6;
> +#endif
>  
>       for (nstk = 0; nstk < MPLS_INKERNEL_LOOP_MAX; nstk++) {
>               if (m->m_len < sizeof(*shim) &&
> @@ -419,6 +435,21 @@ mpls_do_error(struct mbuf *m, int type, 
>               break;
>  #ifdef INET6
>       case IPV6_VERSION >> 4:
> +             m = icmp6_do_error(m, ICMP6_TIME_EXCEEDED,
> +                 ICMP6_TIME_EXCEED_TRANSIT, 0);
> +             if (m == NULL)
> +                     return (NULL);
> +
> +             error = icmp6_reflect(m, sizeof(struct ip6_hdr),
> +                 shim->shim_label & MPLS_LABEL_MASK);
> +             if (error)
> +                     return (NULL);
> +
> +             ip6 = mtod(m, struct ip6_hdr *);
> +             ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
> +             m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
> +             in6_proto_cksum_out(m, NULL);
> +             break;
>  #endif
>       default:
>               m_freem(m);
> 

Reply via email to