On 18/12/18(Tue) 12:13, Denis Fondras wrote: > Here is a serie of diffs to enable MPLSv6, MPLS transport over IPv6.
Nice! > First diff : allow mpe(4) to handle IPv6 trafic. Comments below > Index: net/if_ethersubr.c > =================================================================== > RCS file: /cvs/src/sys/net/if_ethersubr.c,v > retrieving revision 1.255 > diff -u -p -r1.255 if_ethersubr.c > --- net/if_ethersubr.c 12 Dec 2018 05:38:26 -0000 1.255 > +++ net/if_ethersubr.c 18 Dec 2018 11:03:33 -0000 > @@ -246,18 +246,28 @@ ether_resolve(struct ifnet *ifp, struct > sizeof(eh->ether_dhost)); > break; > #ifdef INET6 > +do_v6: > case AF_INET6: > error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost); > if (error) > return (error); > break; > #endif > +do_v4: > case AF_INET: > - case AF_MPLS: > error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); > if (error) > return (error); > break; > + case AF_MPLS: > + switch (rt->rt_gateway->sa_family) { > + case AF_INET: > + goto do_v4; > +#ifdef INET6 > + case AF_INET6: > + goto do_v6; > +#endif > + } > default: > senderr(EHOSTUNREACH); > } This makes ether_resolve() (aka ether_output()) even more spaghetti for MPLS. I know this is not your code, but is it possible to do better than that? The current logic hides an encapsulation. For example would it be possible to call ether_resolve() recursively in the AF_MPLS case? > Index: net/if_mpe.c > =================================================================== > RCS file: /cvs/src/sys/net/if_mpe.c,v > retrieving revision 1.64 > diff -u -p -r1.64 if_mpe.c > --- net/if_mpe.c 9 Jan 2018 15:24:24 -0000 1.64 > +++ net/if_mpe.c 18 Dec 2018 11:03:33 -0000 > @@ -85,7 +85,7 @@ mpe_clone_create(struct if_clone *ifc, i > mpeif->sc_unit = unit; > ifp = &mpeif->sc_if; > snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit); > - ifp->if_flags = IFF_POINTOPOINT; > + ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; You added this flag because the nd6 codes relies on it, right? But does this reflect reality? > ifp->if_xflags = IFXF_CLONED; > ifp->if_softc = mpeif; > ifp->if_mtu = MPE_MTU; > @@ -157,6 +157,16 @@ mpestart(struct ifnet *ifp0) > sizeof(in_addr_t)); > m_adj(m, sizeof(in_addr_t)); > break; > +#ifdef INET6 > + case AF_INET6: > + memset(sa, 0, sizeof(struct sockaddr_in6)); > + satosin6(sa)->sin6_family = af; > + satosin6(sa)->sin6_len = sizeof(struct sockaddr_in6); > + bcopy(mtod(m, caddr_t), &satosin6(sa)->sin6_addr, > + sizeof(struct in6_addr)); > + m_adj(m, sizeof(struct in6_addr)); > + break; > +#endif > default: > m_freem(m); > continue; > @@ -204,6 +214,9 @@ mpeoutput(struct ifnet *ifp, struct mbuf > int error; > int off; > in_addr_t addr; > +#ifdef INET6 > + struct in6_addr addr6; > +#endif > u_int8_t op = 0; > > #ifdef DIAGNOSTIC > @@ -251,6 +264,39 @@ mpeoutput(struct ifnet *ifp, struct mbuf > m_copyback(m, sizeof(sa_family_t), sizeof(in_addr_t), > &addr, M_NOWAIT); > break; > +#ifdef INET6 > + case AF_INET6: > + if (!rt || !(rt->rt_flags & RTF_MPLS)) { I know you copied past, but checking for rt != NULL should be avoided. You should use rtisvalid(). Could you please merge the blocks related to rtentry. I general if you can reduce the copy/paste between v4 & v6 that would be great! Another style neat, we use the ISSET() macro for checking RTF_* flags ;) > + m_freem(m); > + error = ENETUNREACH; > + goto out; > + } > + shim.shim_label = ((struct rt_mpls *)rt->rt_llinfo)->mpls_label; > + shim.shim_label |= MPLS_BOS_MASK; > + op = ((struct rt_mpls *)rt->rt_llinfo)->mpls_operation; > + if (op != MPLS_OP_PUSH) { > + m_freem(m); > + error = ENETUNREACH; > + goto out; > + } > + if (mpls_mapttl_ip) { > + struct ip6_hdr *ip6; > + ip6 = mtod(m, struct ip6_hdr *); > + shim.shim_label |= htonl(ip6->ip6_hops) & MPLS_TTL_MASK; > + } else > + shim.shim_label |= htonl(mpls_defttl) & MPLS_TTL_MASK; > + off = sizeof(sa_family_t) + sizeof(struct in6_addr); > + M_PREPEND(m, sizeof(shim) + off, M_DONTWAIT); > + if (m == NULL) { > + error = ENOBUFS; > + goto out; > + } > + *mtod(m, sa_family_t *) = AF_INET6; > + addr6 = satosin6(rt->rt_gateway)->sin6_addr; > + m_copyback(m, sizeof(sa_family_t), sizeof(struct in6_addr), > + &addr6, M_NOWAIT); > + break; > +#endif > default: > m_freem(m); > error = EPFNOSUPPORT; > @@ -354,6 +400,9 @@ mpeioctl(struct ifnet *ifp, u_long cmd, > } > /* return with ENOTTY so that the parent handler finishes */ > return (ENOTTY); > + case SIOCADDMULTI: > + case SIOCDELMULTI: > + break; Why do you need these handlers? This is hiding something wrong :) > default: > return (ENOTTY); > } > Index: netinet/icmp6.h > =================================================================== > RCS file: /cvs/src/sys/netinet/icmp6.h,v > retrieving revision 1.47 > diff -u -p -r1.47 icmp6.h > --- netinet/icmp6.h 8 Aug 2017 18:15:58 -0000 1.47 > +++ netinet/icmp6.h 18 Dec 2018 11:03:33 -0000 > @@ -610,16 +610,17 @@ struct rtentry; > struct rttimer; > struct in6_multi; > > -void icmp6_init(void); > -void icmp6_paramerror(struct mbuf *, int); > -void icmp6_error(struct mbuf *, int, int, int); > -int icmp6_input(struct mbuf **, int *, int, int); > -void icmp6_fasttimo(void); > -void icmp6_reflect(struct mbuf *, size_t); > -void icmp6_prepare(struct mbuf *); > -void icmp6_redirect_input(struct mbuf *, int); > -void icmp6_redirect_output(struct mbuf *, struct rtentry *); > -int icmp6_sysctl(int *, u_int, void *, size_t *, void *, size_t); > +void icmp6_init(void); > +void icmp6_paramerror(struct mbuf *, int); > +struct mbuf *icmp6_do_error(struct mbuf *, int, int, int); > +void icmp6_error(struct mbuf *, int, int, int); > +int icmp6_input(struct mbuf **, int *, int, int); > +void icmp6_fasttimo(void); > +int icmp6_reflect(struct mbuf *, size_t, u_int32_t); > +void icmp6_prepare(struct mbuf *); > +void icmp6_redirect_input(struct mbuf *, int); > +void icmp6_redirect_output(struct mbuf *, struct rtentry *); > +int icmp6_sysctl(int *, u_int, void *, size_t *, void *, size_t); > > struct ip6ctlparam; > void icmp6_mtudisc_update(struct ip6ctlparam *, int); Introducing icmp6_do_error() could be committed as a separated diff. At least for coherency with v4 :) > Index: netinet6/icmp6.c > =================================================================== > RCS file: /cvs/src/sys/netinet6/icmp6.c,v > retrieving revision 1.228 > diff -u -p -r1.228 icmp6.c > --- netinet6/icmp6.c 10 Dec 2018 23:00:01 -0000 1.228 > +++ netinet6/icmp6.c 18 Dec 2018 11:03:33 -0000 > @@ -102,6 +102,10 @@ > #include <net/pfvar.h> > #endif > > +#ifdef MPLS > +#include <netmpls/mpls.h> > +#endif Can you please find another way than spreading an #ifdef MPLS in IPv6 code? > + > struct cpumem *icmp6counters; > > extern int icmp6errppslim; > @@ -231,11 +235,8 @@ icmp6_mtudisc_callback_register(void (*f > LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list); > } > > -/* > - * Generate an error packet of type error in response to bad IP6 packet. > - */ > -void > -icmp6_error(struct mbuf *m, int type, int code, int param) > +struct mbuf * > +icmp6_do_error(struct mbuf *m, int type, int code, int param) > { > struct ip6_hdr *oip6, *nip6; > struct icmp6_hdr *icmp6; > @@ -250,8 +251,9 @@ icmp6_error(struct mbuf *m, int type, in > > if (m->m_len < sizeof(struct ip6_hdr)) { > m = m_pullup(m, sizeof(struct ip6_hdr)); > - if (m == NULL) > - return; > + if (m == NULL) { > + return (NULL); > + } > } > oip6 = mtod(m, struct ip6_hdr *); > > @@ -294,7 +296,7 @@ icmp6_error(struct mbuf *m, int type, in > sizeof(*icp)); > if (icp == NULL) { > icmp6stat_inc(icp6s_tooshort); > - return; > + return (NULL); > } > if (icp->icmp6_type < ICMP6_ECHO_REQUEST || > icp->icmp6_type == ND_REDIRECT) { > @@ -334,7 +336,7 @@ icmp6_error(struct mbuf *m, int type, in > m = m_pullup(m, preplen); > if (m == NULL) { > nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); > - return; > + return (NULL); > } > > nip6 = mtod(m, struct ip6_hdr *); > @@ -361,18 +363,33 @@ icmp6_error(struct mbuf *m, int type, in > m->m_pkthdr.ph_ifidx = 0; > > icmp6stat_inc(icp6s_outhist + type); > - icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - > ICMPv6 */ > > - return; > + return (m); > > freeit: > /* > * If we can't tell wheter or not we can generate ICMP6, free it. > */ > - m_freem(m); > + return(m_freem(m)); > } > > /* > + * Generate an error packet of type error in response to bad IP6 packet. > + */ > +void > +icmp6_error(struct mbuf *m, int type, int code, int param) > +{ > + struct mbuf *n; > + > + n = icmp6_do_error(m, type, code, param); > + if (n != NULL) { > + /* header order: IPv6 - ICMPv6 */ > + if (!icmp6_reflect(n, sizeof(struct ip6_hdr), -1)) > + ip6_send(n); > + } > +} > + > +/* > * Process a received ICMP6 message. > */ > int > @@ -597,7 +614,8 @@ icmp6_input(struct mbuf **mp, int *offp, > nicmp6->icmp6_code = 0; > icmp6stat_inc(icp6s_reflect); > icmp6stat_inc(icp6s_outhist + ICMP6_ECHO_REPLY); > - icmp6_reflect(n, noff); > + if (!icmp6_reflect(n, noff, -1)) > + ip6_send(n); > } > if (!m) > goto freeit; > @@ -1030,15 +1048,17 @@ icmp6_mtudisc_update(struct ip6ctlparam > * Reflect the ip6 packet back to the source. > * OFF points to the icmp6 header, counted from the top of the mbuf. > */ > -void > -icmp6_reflect(struct mbuf *m, size_t off) > +int > +icmp6_reflect(struct mbuf *m, size_t off, u_int32_t mpls_label) > { > struct rtentry *rt = NULL; > struct ip6_hdr *ip6; > struct icmp6_hdr *icmp6; > struct in6_addr t, *src = NULL; > struct sockaddr_in6 sa6_src, sa6_dst; > + struct in6_ifaddr *ia6; > u_int rtableid; > + char addr[INET6_ADDRSTRLEN]; > > CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN); > > @@ -1051,8 +1071,10 @@ icmp6_reflect(struct mbuf *m, size_t off > goto bad; > } > > - if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) > - goto bad; > + if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { > + m_freem(m); > + return (ELOOP); > + } > rtableid = m->m_pkthdr.ph_rtableid; > m_resethdr(m); > m->m_pkthdr.ph_rtableid = rtableid; > @@ -1071,7 +1093,7 @@ icmp6_reflect(struct mbuf *m, size_t off > l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); > if (m->m_len < l) { > if ((m = m_pullup(m, l)) == NULL) > - return; > + return (EMSGSIZE); > } > memcpy(mtod(m, caddr_t), &nip6, sizeof(nip6)); > } else /* off == sizeof(struct ip6_hdr) */ { > @@ -1079,7 +1101,7 @@ icmp6_reflect(struct mbuf *m, size_t off > l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); > if (m->m_len < l) { > if ((m = m_pullup(m, l)) == NULL) > - return; > + return (EMSGSIZE); > } > } > ip6 = mtod(m, struct ip6_hdr *); > @@ -1094,8 +1116,8 @@ icmp6_reflect(struct mbuf *m, size_t off > ip6->ip6_dst = ip6->ip6_src; > > /* > - * XXX: make sure to embed scope zone information, using > - * already embedded IDs or the received interface (if any). > + * XXX: make sure to embed scope zone information, using already > + * embedded IDs or the received interface (if any). > * Note that rcvif may be NULL. > * TODO: scoped routing case (XXX). > */ > @@ -1107,39 +1129,23 @@ icmp6_reflect(struct mbuf *m, size_t off > sa6_dst.sin6_family = AF_INET6; > sa6_dst.sin6_len = sizeof(sa6_dst); > sa6_dst.sin6_addr = t; > + > +#ifdef MPLS > + if (mpls_label != -1) { > + struct sockaddr_mpls sa_mpls, *smpls; > + /* set ip6_src to something usable, based on the MPLS label */ > + bzero(&sa_mpls, sizeof(sa_mpls)); > + smpls = &sa_mpls; > + smpls->smpls_family = AF_MPLS; > + smpls->smpls_len = sizeof(*smpls); > + smpls->smpls_label = mpls_label; > > - /* > - * If the incoming packet was addressed directly to us (i.e. unicast), > - * use dst as the src for the reply. > - * The IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED case would be VERY rare, > - * but is possible (for example) when we encounter an error while > - * forwarding procedure destined to a duplicated address of ours. > - */ > - rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid); > - if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) && > - !ISSET(ifatoia6(rt->rt_ifa)->ia6_flags, > - IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) { > - src = &t; > - } > - rtfree(rt); > - rt = NULL; > - > - if (src == NULL) { > - struct in6_ifaddr *ia6; > - > - /* > - * This case matches to multicasts, our anycast, or unicasts > - * that we do not own. Select a source address based on the > - * source address of the erroneous packet. > - */ > - rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE, rtableid); > + rt = rtalloc(smplstosa(smpls), RT_RESOLVE, rtableid); > if (!rtisvalid(rt)) { > - char addr[INET6_ADDRSTRLEN]; > - > nd6log((LOG_DEBUG, > "%s: source can't be determined: dst=%s\n", > __func__, inet_ntop(AF_INET6, &sa6_src.sin6_addr, > - addr, sizeof(addr)))); > + addr, sizeof(addr)))); > rtfree(rt); > goto bad; > } > @@ -1149,6 +1155,48 @@ icmp6_reflect(struct mbuf *m, size_t off > if (src == NULL) > src = &ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; > } > +#endif > + if (src == NULL) { > + /* > + * If the incoming packet was addressed directly to us (i.e. > + * unicast), use dst as the src for the reply. The > + * IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED case would be VERY rare, > + * but is possible (for example) when we encounter an error > + * while forwarding procedure destined to a duplicated address > + * of ours. > + */ > + rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid); > + if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) && > + !ISSET(ifatoia6(rt->rt_ifa)->ia6_flags, > + IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) { > + src = &t; > + } > + rtfree(rt); > + rt = NULL; > + > + if (src == NULL) { > + /* > + * This case matches to multicasts, our anycast, or > + * unicasts that we do not own. Select a source address > + * based on the source address of the erroneous packet. > + */ > + rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE, rtableid); > + if (!rtisvalid(rt)) { > + nd6log((LOG_DEBUG, > + "%s: source can't be determined: dst=%s\n", > + __func__, inet_ntop(AF_INET6, > + &sa6_src.sin6_addr, addr, sizeof(addr)))); > + rtfree(rt); > + goto bad; > + } > + ia6 = in6_ifawithscope(rt->rt_ifa->ifa_ifp, &t, > + rtableid); > + if (ia6 != NULL) > + src = &ia6->ia_addr.sin6_addr; > + if (src == NULL) > + src = &ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; > + } > + } > > ip6->ip6_src = *src; > rtfree(rt); > @@ -1167,13 +1215,11 @@ icmp6_reflect(struct mbuf *m, size_t off > */ > > m->m_flags &= ~(M_BCAST|M_MCAST); > - > - ip6_send(m); > - return; > + return (0); > > bad: > m_freem(m); > - return; > + return (EHOSTUNREACH); > } > > void > Index: netmpls/mpls_input.c > =================================================================== > RCS file: /cvs/src/sys/netmpls/mpls_input.c,v > retrieving revision 1.68 > diff -u -p -r1.68 mpls_input.c > --- netmpls/mpls_input.c 12 Jan 2018 06:57:56 -0000 1.68 > +++ netmpls/mpls_input.c 18 Dec 2018 11:03:33 -0000 > @@ -36,6 +36,8 @@ > > #ifdef INET6 > #include <netinet/ip6.h> > +#include <netinet6/ip6_var.h> > +#include <netinet/icmp6.h> > #endif /* INET6 */ > > #include <netmpls/mpls.h> > @@ -200,8 +202,19 @@ do_v6: > #if NMPE > 0 > if (ifp->if_type == IFT_MPLS) { > smpls = satosmpls(rt_key(rt)); > - mpe_input(m, ifp, smpls, ttl); > - goto done; > + if (m->m_len < sizeof(u_char) && > + (m = m_pullup(m, sizeof(u_char))) == NULL) > + return; > + switch (*mtod(m, u_char *) >> 4) { > + case IPVERSION: > + mpe_input(m, ifp, smpls, ttl); > + goto done; > +#ifdef INET6 > + case IPV6_VERSION >> 4: > + mpe_input6(m, ifp, smpls, ttl); > + goto done; > +#endif > + } > } > #endif > if (ifp->if_type == IFT_MPLSTUNNEL) { > @@ -345,6 +358,9 @@ mpls_do_error(struct mbuf *m, int type, > struct icmp *icp; > struct ip *ip; > int nstk, error; > +#ifdef INET6 > + struct ip6_hdr *ip6; > +#endif > > for (nstk = 0; nstk < MPLS_INKERNEL_LOOP_MAX; nstk++) { > if (m->m_len < sizeof(*shim) && > @@ -419,6 +435,21 @@ mpls_do_error(struct mbuf *m, int type, > break; > #ifdef INET6 > case IPV6_VERSION >> 4: > + m = icmp6_do_error(m, ICMP6_TIME_EXCEEDED, > + ICMP6_TIME_EXCEED_TRANSIT, 0); > + if (m == NULL) > + return (NULL); > + > + error = icmp6_reflect(m, sizeof(struct ip6_hdr), > + shim->shim_label & MPLS_LABEL_MASK); > + if (error) > + return (NULL); > + > + ip6 = mtod(m, struct ip6_hdr *); > + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); > + m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT; > + in6_proto_cksum_out(m, NULL); > + break; > #endif > default: > m_freem(m); >