On 23/01/21(Sat) 12:22, Denis Fondras wrote: > Le Sat, Jan 09, 2021 at 06:50:50PM +0100, Denis Fondras a écrit : > > This diff place the user-set source address outside of struct art_root and > > make > > the code more readable (to me). > > > > Based on a concept by mpi@
Comments below. > > Index: net/rtsock.c > > =================================================================== > > RCS file: /cvs/src/sys/net/rtsock.c,v > > retrieving revision 1.304 > > diff -u -p -r1.304 rtsock.c > > --- net/rtsock.c 7 Nov 2020 09:51:40 -0000 1.304 > > +++ net/rtsock.c 9 Jan 2021 16:04:02 -0000 > > @@ -138,7 +138,8 @@ int sysctl_iflist(int, struct walkarg > > int sysctl_ifnames(struct walkarg *); > > int sysctl_rtable_rtstat(void *, size_t *, void *); > > > > -int rt_setsource(unsigned int, struct sockaddr *); > > +int rt_sourceset(struct rtentry *, unsigned int); > > +struct rtentry *rt_get_rt(int, unsigned int); I don't understand what's the use for rt_get_rt(), the name of the function isn't helping either, more on that below. > > /* > > * Locks used to protect struct members > > @@ -170,6 +171,14 @@ struct rtptable { > > struct pool rtpcb_pool; > > struct rtptable rtptable; > > > > +struct rt_srcaddr { > > + LIST_ENTRY(rt_srcaddr) rts_next; > > + unsigned int rts_rtableid; > > + struct rtentry *rts_rt; > > +}; > > + > > +LIST_HEAD(, rt_srcaddr) srcaddr_h = LIST_HEAD_INITIALIZER(srcaddr_h); > > + Could you document which lock are protecting those fields? Can you assert such lock is held when accessing them? Could you also document what this data structure is for? > > /* > > * These flags and timeout are used for indicating to userland (via a > > * RTM_DESYNC msg) when the route socket has overflowed and messages > > @@ -664,10 +673,7 @@ rtm_report(struct rtentry *rt, u_char ty > > ifp = if_get(rt->rt_ifidx); > > if (ifp != NULL) { > > info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); > > - info.rti_info[RTAX_IFA] = > > - rtable_getsource(tableid, > > info.rti_info[RTAX_DST]->sa_family); > > - if (info.rti_info[RTAX_IFA] == NULL) > > - info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; > > + info.rti_info[RTAX_IFA] = rt_get_ifa(rt, tableid)->ifa_addr; > > if (ifp->if_flags & IFF_POINTOPOINT) With the introduction of rt_get_ifa() is there any place left in the network stack where `rt->rt_ifa' is accessed directly? Is there a reason? Should we explain when using the function is necessary in a comment on top of it? > > info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; > > } > > @@ -860,10 +866,28 @@ route_output(struct mbuf *m, struct sock > > if (info.rti_info[RTAX_IFA] == NULL) { > > error = EINVAL; > > goto fail; > > + } else if ((info.rti_info[RTAX_IFA]->sa_family == AF_INET6 && > > + IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *) > > + info.rti_info[RTAX_IFA])->sin6_addr)) || > > + (info.rti_info[RTAX_IFA]->sa_family == AF_INET && > > + ((struct sockaddr_in *) > > + info.rti_info[RTAX_IFA])->sin_addr.s_addr == 0)) { Do I understand correctly that the default route is used as a magic value to clear any preferred source routing entry? Isn't it possible to retrieve this route via rtalloc(9) instead of rt_get_rt()? Why do we need a `rt' at all? If it's because rt_sourceclear() expects one, can we change this expectation? > > + > > rt_sourceclear(rt_get_rt(info.rti_info[RTAX_IFA]->sa_family, > > + tableid), tableid); > > + rtfree(rt); > > + rt = NULL; > > + } else { > > + rt = rtalloc(info.rti_info[RTAX_IFA], 0, tableid); > > + if (rt == NULL || !ISSET(rt->rt_flags, RTF_LOCAL)) { > > + error = EINVAL; > > + goto fail; > > + } > > + NET_LOCK(); > > + error = rt_sourceset(rt, tableid); > > + NET_UNLOCK(); Could you push the NET_LOCK() down and do the allocation before grabbing it? We should refrain from calling malloc(9) with such lock held. Maybe the NET_LOCK() is not the lock we want here, but this can be changed later. > > + if (error != 0) > > + goto fail; > > } > > - if ((error = > > - rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0) > > - goto fail; > > } else { > > error = rtm_output(rtm, &rt, &info, prio, tableid); > > if (!error) { > > @@ -873,9 +897,9 @@ route_output(struct mbuf *m, struct sock > > rtm = rtm_report(rt, type, seq, tableid); > > len = rtm->rtm_msglen; > > } > > + rtfree(rt); > > } > > > > - rtfree(rt); > > if (error) { > > rtm->rtm_errno = error; > > } else { > > @@ -1687,10 +1711,7 @@ rtm_send(struct rtentry *rt, int cmd, in > > ifp = if_get(rt->rt_ifidx); > > if (ifp != NULL) { > > info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); > > - info.rti_info[RTAX_IFA] = > > - rtable_getsource(rtableid, > > info.rti_info[RTAX_DST]->sa_family); > > - if (info.rti_info[RTAX_IFA] == NULL) > > - info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; > > + info.rti_info[RTAX_IFA] = rt_get_ifa(rt, rtableid)->ifa_addr; > > } > > > > rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, > > @@ -1928,10 +1949,7 @@ sysctl_dumpentry(struct rtentry *rt, voi > > ifp = if_get(rt->rt_ifidx); > > if (ifp != NULL) { > > info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); > > - info.rti_info[RTAX_IFA] = > > - rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); > > - if (info.rti_info[RTAX_IFA] == NULL) > > - info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; > > + info.rti_info[RTAX_IFA] = rt_get_ifa(rt, id)->ifa_addr; > > if (ifp->if_flags & IFF_POINTOPOINT) > > info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; > > } > > @@ -2067,33 +2085,30 @@ sysctl_ifnames(struct walkarg *w) > > } > > > > int > > -sysctl_source(int af, u_int tableid, struct walkarg *w) > > +sysctl_source(int af, struct walkarg *w) > > { > > - struct sockaddr *sa; > > - int size, error = 0; > > + struct rt_srcaddr *rtsa = NULL; > > + unsigned int tableid = w->w_arg; > > + int error = 0; > > > > - sa = rtable_getsource(tableid, af); > > - if (sa) { > > - switch (sa->sa_family) { > > - case AF_INET: > > - size = sizeof(struct sockaddr_in); > > - break; > > -#ifdef INET6 > > - case AF_INET6: > > - size = sizeof(struct sockaddr_in6); > > - break; > > -#endif > > - default: > > - return (0); > > - } > > - w->w_needed += size; > > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) { > > + if (rtsa->rts_rtableid != tableid) > > + continue; > > + if (af != 0 && rtsa->rts_rt->rt_dest->sa_family != af) > > + continue; > > + > > + w->w_needed += rtsa->rts_rt->rt_dest->sa_len; > > if (w->w_where && w->w_needed <= 0) { > > - if ((error = copyout(sa, w->w_where, size))) > > - return (error); > > - w->w_where += size; > > + error = copyout(rtsa->rts_rt->rt_ifa->ifa_addr, > > + w->w_where, rtsa->rts_rt->rt_dest->sa_len); > > + if (error == EAFNOSUPPORT) > > + error = 0; > > + if (error) > > + break; > > + w->w_where += rtsa->rts_rt->rt_dest->sa_len; > > } > > } > > - return (0); > > + return (error); > > } > > > > int > > @@ -2171,16 +2186,7 @@ sysctl_rtable(int *name, u_int namelen, > > if (!rtable_exists(tableid)) > > return (ENOENT); > > NET_LOCK(); > > - for (i = 1; i <= AF_MAX; i++) { > > - if (af != 0 && af != i) > > - continue; > > - > > - error = sysctl_source(i, tableid, &w); > > - if (error == EAFNOSUPPORT) > > - error = 0; > > - if (error) > > - break; > > - } > > + error = sysctl_source(af, &w); > > NET_UNLOCK(); Same here could you push the lock inside the function? > > break; > > } > > @@ -2314,40 +2320,96 @@ rtm_validate_proposal(struct rt_addrinfo > > } > > > > int > > -rt_setsource(unsigned int rtableid, struct sockaddr *src) > > +rt_sourceset(struct rtentry *rt, unsigned int rtableid) > > { > > - struct ifaddr *ifa; > > - /* > > - * If source address is 0.0.0.0 or :: > > - * use automatic source selection > > - */ > > - switch(src->sa_family) { > > - case AF_INET: > > - if(satosin(src)->sin_addr.s_addr == INADDR_ANY) { > > - rtable_setsource(rtableid, AF_INET, NULL); > > - return (0); > > - } > > - break; > > -#ifdef INET6 > > - case AF_INET6: > > - if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { > > - rtable_setsource(rtableid, AF_INET6, NULL); > > - return (0); > > + struct rt_srcaddr *rtsa = NULL; > > + > > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) { > > + if (rtsa->rts_rtableid == rtableid && > > + rtsa->rts_rt->rt_dest->sa_family == rt->rt_dest->sa_family) > > + break; > > + } > > + if (rtsa == NULL) { > > + if ((rtsa = malloc(sizeof(struct rt_srcaddr), M_IFADDR, > > + M_NOWAIT|M_ZERO)) == NULL) > > + return (ENOMEM); > > + rtsa->rts_rtableid = rtableid; > > + rtsa->rts_rt = rt; > > + LIST_INSERT_HEAD(&srcaddr_h, rtsa, rts_next); > > + } else { > > + /* Update existing entry */ > > + rtfree(rtsa->rts_rt); > > + rtsa->rts_rt = rt; > > + } > > + > > + return (0); > > +} > > + > > +/* > > + * Return the 'ifa' associated to a given 'rt' unless a preferred > > + * source address has been specified for the same routing table. > > + */ > > +struct rtentry * > > +rt_get_rt(int af, unsigned int rtableid) > > +{ > > + struct rt_srcaddr *rtsa = NULL; > > + > > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) { > > + if (rtsa->rts_rtableid == rtableid && > > + rtsa->rts_rt->rt_dest->sa_family == af) > > + break; > > + } > > + if (rtsa) > > + return (rtsa->rts_rt); > > + > > + return (NULL); > > +} Here we should document that it is safe to dereference `ifa' as long as the caller owns a valid reference to `rt'. > > +struct ifaddr * > > +rt_get_ifa(struct rtentry *rt, unsigned int rtableid) > > +{ > > + struct rt_srcaddr *rtsa = NULL; > > + struct ifnet *ifp = NULL; > > + > > + if (ISSET(rt->rt_flags, RTF_HOST|RTF_LLINFO)) > > + return (rt->rt_ifa); > > + Please add an assert for the lock here, just before iterating on a data structure that requires it. > > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) { > > + if (rtsa->rts_rtableid == rtableid && > > + rtsa->rts_rt->rt_dest->sa_family == rt->rt_dest->sa_family) > > + break; > > + } > > + if (rtsa != NULL && rtisvalid(rtsa->rts_rt)) { > > + struct rtentry *nrt = rtsa->rts_rt; > > + > > + ifp = if_get(nrt->rt_ifidx); > > + if (ifp != NULL) { > > + if (ISSET(ifp->if_flags, IFF_UP)) { > > + if_put(ifp); > > + return (nrt->rt_ifa); > > + } I don't understand this check. The `ifp' can be brought down at anytime. This seems like a workaround for something and should be at least documented. > > } > > - break; > > -#endif > > - default: > > - return (EAFNOSUPPORT); > > } > > + return (rt->rt_ifa); > > +} > > > > - /* > > - * Check if source address is assigned to an interface in the > > - * same rdomain > > - */ > > - if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) > > - return (EINVAL); > > +void > > +rt_sourceclear(struct rtentry *rt, unsigned int rtableid) > > +{ > > + struct rt_srcaddr *rtsa = NULL; > > > > - return (rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr)); > > + if (rt == NULL) > > + return; > > + > > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) { > > + if (rtsa->rts_rtableid != rtableid) > > + continue; > > + if (rtsa->rts_rt == rt) { > > + LIST_REMOVE(rtsa, rts_next); I'd suggest using the 'break' idiom and not calling a LIST_REMOVE() inside a list which isn't a *_SAFE() variant, that is IMHO less confusing. > > + free(rtsa, M_IFADDR, sizeof(struct rt_srcaddr)); > > + break; > > + } > > + } > > } > > > > /* > > Index: netinet/in_pcb.c > > =================================================================== > > RCS file: /cvs/src/sys/netinet/in_pcb.c,v > > retrieving revision 1.252 > > diff -u -p -r1.252 in_pcb.c > > --- netinet/in_pcb.c 7 Nov 2020 09:51:40 -0000 1.252 > > +++ netinet/in_pcb.c 9 Jan 2021 16:04:02 -0000 > > @@ -887,7 +887,6 @@ in_pcbselsrc(struct in_addr **insrc, str > > struct route *ro = &inp->inp_route; > > struct in_addr *laddr = &inp->inp_laddr; > > u_int rtableid = inp->inp_rtableid; > > - struct sockaddr *ip4_source = NULL; > > > > struct sockaddr_in *sin2; > > struct in_ifaddr *ia = NULL; > > @@ -951,30 +950,11 @@ in_pcbselsrc(struct in_addr **insrc, str > > } > > > > /* > > - * If we found a route, use the address > > - * corresponding to the outgoing interface. > > + * If we found a route, use the address corresponding to the > > + * outgoing interface or the preferred source address if set. > > */ > > if (ro->ro_rt != NULL) > > - ia = ifatoia(ro->ro_rt->rt_ifa); > > - > > - /* > > - * Use preferred source address if : > > - * - destination is not onlink > > - * - preferred source addresss is set > > - * - output interface is UP > > - */ > > - if (ro->ro_rt && !(ro->ro_rt->rt_flags & RTF_LLINFO) && > > - !(ro->ro_rt->rt_flags & RTF_HOST)) { > > - ip4_source = rtable_getsource(rtableid, AF_INET); > > - if (ip4_source != NULL) { > > - struct ifaddr *ifa; > > - if ((ifa = ifa_ifwithaddr(ip4_source, rtableid)) != > > - NULL && ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) { > > - *insrc = &satosin(ip4_source)->sin_addr; > > - return (0); > > - } > > - } > > - } > > + ia = ifatoia(rt_get_ifa(ro->ro_rt, rtableid)); > > > > if (ia == NULL) > > return (EADDRNOTAVAIL); > > Index: netinet/ip_icmp.c > > =================================================================== > > RCS file: /cvs/src/sys/netinet/ip_icmp.c,v > > retrieving revision 1.184 > > diff -u -p -r1.184 ip_icmp.c > > --- netinet/ip_icmp.c 20 Dec 2020 21:15:47 -0000 1.184 > > +++ netinet/ip_icmp.c 9 Jan 2021 16:04:02 -0000 > > @@ -745,7 +745,7 @@ icmp_reflect(struct mbuf *m, struct mbuf > > return (EHOSTUNREACH); > > } > > > > - ia = ifatoia(rt->rt_ifa); > > + ia = ifatoia(rt_get_ifa(rt, rtableid)); > > } > > > > ip->ip_dst = ip->ip_src; > > Index: netinet6/icmp6.c > > =================================================================== > > RCS file: /cvs/src/sys/netinet6/icmp6.c,v > > retrieving revision 1.233 > > diff -u -p -r1.233 icmp6.c > > --- netinet6/icmp6.c 28 Oct 2020 17:27:35 -0000 1.233 > > +++ netinet6/icmp6.c 9 Jan 2021 16:04:02 -0000 > > @@ -1163,7 +1163,10 @@ icmp6_reflect(struct mbuf **mp, size_t o > > rtfree(rt); > > goto bad; > > } > > - ia6 = in6_ifawithscope(rt->rt_ifa->ifa_ifp, &t, rtableid); > > + ia6 = ifatoia6(rt_get_ifa(rt, rtableid)); > > + if (ia6 == NULL) > > + ia6 = in6_ifawithscope(rt->rt_ifa->ifa_ifp, &t, > > + rtableid); > > if (ia6 != NULL) > > src = &ia6->ia_addr.sin6_addr; > > if (src == NULL) > > Index: netinet6/in6_src.c > > =================================================================== > > RCS file: /cvs/src/sys/netinet6/in6_src.c,v > > retrieving revision 1.84 > > diff -u -p -r1.84 in6_src.c > > --- netinet6/in6_src.c 7 Nov 2020 09:51:40 -0000 1.84 > > +++ netinet6/in6_src.c 9 Jan 2021 16:04:02 -0000 > > @@ -100,7 +100,6 @@ in6_pcbselsrc(struct in6_addr **in6src, > > struct in6_addr *laddr = &inp->inp_laddr6; > > u_int rtableid = inp->inp_rtableid; > > struct ifnet *ifp = NULL; > > - struct sockaddr *ip6_source = NULL; > > struct in6_addr *dst; > > struct in6_ifaddr *ia6 = NULL; > > struct in6_pktinfo *pi = NULL; > > @@ -208,32 +207,16 @@ in6_pcbselsrc(struct in6_addr **in6src, > > */ > > > > if (ro->ro_rt) { > > - ifp = if_get(ro->ro_rt->rt_ifidx); > > - if (ifp != NULL) { > > - ia6 = in6_ifawithscope(ifp, dst, rtableid); > > - if_put(ifp); > > + ia6 = ifatoia6(rt_get_ifa(ro->ro_rt, rtableid)); > > + if (ia6 == NULL) { > > + ifp = if_get(ro->ro_rt->rt_ifidx); > > + if (ifp != NULL) { > > + ia6 = in6_ifawithscope(ifp, dst, rtableid); > > + if_put(ifp); > > + } > > } > > if (ia6 == NULL) /* xxx scope error ?*/ > > ia6 = ifatoia6(ro->ro_rt->rt_ifa); ^^^^^^^^^^^^^^^^^ Shouldn't rt_get_ifa() be used here as well? Shouldn't the scope be checked first? I'd love to see this code unified with the logic in icmp6_reflect() bot are different currently. > > - } > > - > > - /* > > - * Use preferred source address if : > > - * - destination is not onlink > > - * - preferred source addresss is set > > - * - output interface is UP > > - */ > > - if (ro->ro_rt && !(ro->ro_rt->rt_flags & RTF_LLINFO) && > > - !(ro->ro_rt->rt_flags & RTF_HOST)) { > > - ip6_source = rtable_getsource(rtableid, AF_INET6); > > - if (ip6_source != NULL) { > > - struct ifaddr *ifa; > > - if ((ifa = ifa_ifwithaddr(ip6_source, rtableid)) != > > - NULL && ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) { > > - *in6src = &satosin6(ip6_source)->sin6_addr; > > - return (0); > > - } > > - } > > } > > > > if (ia6 == NULL)