On 23/01/21(Sat) 12:22, Denis Fondras wrote:
> Le Sat, Jan 09, 2021 at 06:50:50PM +0100, Denis Fondras a écrit :
> > This diff place the user-set source address outside of struct art_root and 
> > make
> > the code more readable (to me).
> > 
> > Based on a concept by mpi@

Comments below.

> > Index: net/rtsock.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/rtsock.c,v
> > retrieving revision 1.304
> > diff -u -p -r1.304 rtsock.c
> > --- net/rtsock.c    7 Nov 2020 09:51:40 -0000       1.304
> > +++ net/rtsock.c    9 Jan 2021 16:04:02 -0000
> > @@ -138,7 +138,8 @@ int              sysctl_iflist(int, struct walkarg 
> >  int                 sysctl_ifnames(struct walkarg *);
> >  int                 sysctl_rtable_rtstat(void *, size_t *, void *);
> >  
> > -int                 rt_setsource(unsigned int, struct sockaddr *);
> > +int                 rt_sourceset(struct rtentry *, unsigned int);
> > +struct rtentry     *rt_get_rt(int, unsigned int);

I don't understand what's the use for rt_get_rt(), the name of the
function isn't helping either, more on that below.

> >  /*
> >   * Locks used to protect struct members
> > @@ -170,6 +171,14 @@ struct rtptable {
> >  struct pool rtpcb_pool;
> >  struct rtptable rtptable;
> >  
> > +struct rt_srcaddr {
> > +   LIST_ENTRY(rt_srcaddr)   rts_next;
> > +   unsigned int             rts_rtableid;
> > +   struct rtentry          *rts_rt;
> > +};
> > +
> > +LIST_HEAD(, rt_srcaddr)    srcaddr_h = LIST_HEAD_INITIALIZER(srcaddr_h);
> > +

Could you document which lock are protecting those fields?  Can you
assert such lock is held when accessing them?  Could you also document
what this data structure is for?

> >  /*
> >   * These flags and timeout are used for indicating to userland (via a
> >   * RTM_DESYNC msg) when the route socket has overflowed and messages
> > @@ -664,10 +673,7 @@ rtm_report(struct rtentry *rt, u_char ty
> >     ifp = if_get(rt->rt_ifidx);
> >     if (ifp != NULL) {
> >             info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
> > -           info.rti_info[RTAX_IFA] =
> > -               rtable_getsource(tableid, 
> > info.rti_info[RTAX_DST]->sa_family);
> > -           if (info.rti_info[RTAX_IFA] == NULL)
> > -                   info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
> > +           info.rti_info[RTAX_IFA] = rt_get_ifa(rt, tableid)->ifa_addr;
> >             if (ifp->if_flags & IFF_POINTOPOINT)

With the introduction of rt_get_ifa() is there any place left in the
network stack where `rt->rt_ifa' is accessed directly?  Is there a
reason?  Should we explain when using the function is necessary in a
comment on top of it?

> >                     info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
> >     }
> > @@ -860,10 +866,28 @@ route_output(struct mbuf *m, struct sock
> >             if (info.rti_info[RTAX_IFA] == NULL) {
> >                     error = EINVAL;
> >                     goto fail;
> > +           } else if ((info.rti_info[RTAX_IFA]->sa_family == AF_INET6 &&
> > +               IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)
> > +               info.rti_info[RTAX_IFA])->sin6_addr)) ||
> > +               (info.rti_info[RTAX_IFA]->sa_family == AF_INET &&
> > +               ((struct sockaddr_in *)
> > +               info.rti_info[RTAX_IFA])->sin_addr.s_addr == 0)) {

Do I understand correctly that the default route is used as a magic
value to clear any preferred source routing entry?  Isn't it possible to
retrieve this route via rtalloc(9) instead of rt_get_rt()?

Why do we need a `rt' at all?  If it's because rt_sourceclear() expects
one, can we change this expectation?

> > +                   
> > rt_sourceclear(rt_get_rt(info.rti_info[RTAX_IFA]->sa_family,
> > +                       tableid), tableid);
> > +                   rtfree(rt);
> > +                   rt = NULL;
> > +           } else {
> > +                   rt = rtalloc(info.rti_info[RTAX_IFA], 0, tableid);
> > +                   if (rt == NULL || !ISSET(rt->rt_flags, RTF_LOCAL)) {
> > +                           error = EINVAL;
> > +                           goto fail;
> > +                   }
> > +                   NET_LOCK();
> > +                   error = rt_sourceset(rt, tableid);
> > +                   NET_UNLOCK();

Could you push the NET_LOCK() down and do the allocation before grabbing
it?  We should refrain from calling malloc(9) with such lock held.

Maybe the NET_LOCK() is not the lock we want here, but this can be
changed later.

> > +                   if (error != 0)
> > +                           goto fail;
> >             }
> > -           if ((error =
> > -               rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0)
> > -                   goto fail;
> >     } else {
> >             error = rtm_output(rtm, &rt, &info, prio, tableid);
> >             if (!error) {
> > @@ -873,9 +897,9 @@ route_output(struct mbuf *m, struct sock
> >                     rtm = rtm_report(rt, type, seq, tableid);
> >                     len = rtm->rtm_msglen;
> >             }
> > +           rtfree(rt);
> >     }
> >  
> > -   rtfree(rt);
> >     if (error) {
> >             rtm->rtm_errno = error;
> >     } else {
> > @@ -1687,10 +1711,7 @@ rtm_send(struct rtentry *rt, int cmd, in
> >     ifp = if_get(rt->rt_ifidx);
> >     if (ifp != NULL) {
> >             info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
> > -           info.rti_info[RTAX_IFA] =
> > -               rtable_getsource(rtableid, 
> > info.rti_info[RTAX_DST]->sa_family);
> > -           if (info.rti_info[RTAX_IFA] == NULL)
> > -                   info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
> > +           info.rti_info[RTAX_IFA] = rt_get_ifa(rt, rtableid)->ifa_addr;
> >     }
> >  
> >     rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
> > @@ -1928,10 +1949,7 @@ sysctl_dumpentry(struct rtentry *rt, voi
> >     ifp = if_get(rt->rt_ifidx);
> >     if (ifp != NULL) {
> >             info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
> > -           info.rti_info[RTAX_IFA] =
> > -               rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
> > -           if (info.rti_info[RTAX_IFA] == NULL)
> > -                   info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
> > +           info.rti_info[RTAX_IFA] = rt_get_ifa(rt, id)->ifa_addr;
> >             if (ifp->if_flags & IFF_POINTOPOINT)
> >                     info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
> >     }
> > @@ -2067,33 +2085,30 @@ sysctl_ifnames(struct walkarg *w)
> >  }
> >  
> >  int
> > -sysctl_source(int af, u_int tableid, struct walkarg *w)
> > +sysctl_source(int af, struct walkarg *w)
> >  {
> > -   struct sockaddr *sa;
> > -   int              size, error = 0;
> > +   struct rt_srcaddr       *rtsa = NULL;
> > +   unsigned int             tableid = w->w_arg;
> > +   int                      error = 0;
> >  
> > -   sa = rtable_getsource(tableid, af);
> > -   if (sa) {
> > -           switch (sa->sa_family) {
> > -           case AF_INET:
> > -                   size = sizeof(struct sockaddr_in);
> > -                   break;
> > -#ifdef INET6
> > -           case AF_INET6:
> > -                   size = sizeof(struct sockaddr_in6);
> > -                   break;
> > -#endif
> > -           default:
> > -                   return (0);
> > -           }
> > -           w->w_needed += size;
> > +   LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > +           if (rtsa->rts_rtableid != tableid)
> > +                   continue;
> > +           if (af != 0 && rtsa->rts_rt->rt_dest->sa_family != af)
> > +                   continue;
> > +
> > +           w->w_needed += rtsa->rts_rt->rt_dest->sa_len;
> >             if (w->w_where && w->w_needed <= 0) {
> > -                   if ((error = copyout(sa, w->w_where, size)))
> > -                           return (error);
> > -                   w->w_where += size;
> > +                   error = copyout(rtsa->rts_rt->rt_ifa->ifa_addr,
> > +                       w->w_where, rtsa->rts_rt->rt_dest->sa_len);
> > +                   if (error == EAFNOSUPPORT)
> > +                           error = 0;
> > +                   if (error)
> > +                           break;
> > +                   w->w_where += rtsa->rts_rt->rt_dest->sa_len;
> >             }
> >     }
> > -   return (0);
> > +   return (error);
> >  }
> >  
> >  int
> > @@ -2171,16 +2186,7 @@ sysctl_rtable(int *name, u_int namelen, 
> >             if (!rtable_exists(tableid))
> >                     return (ENOENT);
> >             NET_LOCK();
> > -           for (i = 1; i <= AF_MAX; i++) {
> > -                   if (af != 0 && af != i)
> > -                           continue;
> > -
> > -                   error = sysctl_source(i, tableid, &w);
> > -                   if (error == EAFNOSUPPORT)
> > -                           error = 0;
> > -                   if (error)
> > -                           break;
> > -           }
> > +           error = sysctl_source(af, &w);
> >             NET_UNLOCK();

Same here could you push the lock inside the function?

> >             break;
> >     }
> > @@ -2314,40 +2320,96 @@ rtm_validate_proposal(struct rt_addrinfo
> >  }
> >  
> >  int
> > -rt_setsource(unsigned int rtableid, struct sockaddr *src)
> > +rt_sourceset(struct rtentry *rt, unsigned int rtableid)
> >  {
> > -   struct ifaddr   *ifa;
> > -   /*
> > -    * If source address is 0.0.0.0 or ::
> > -    * use automatic source selection
> > -    */
> > -   switch(src->sa_family) {
> > -   case AF_INET:
> > -           if(satosin(src)->sin_addr.s_addr == INADDR_ANY) {
> > -                   rtable_setsource(rtableid, AF_INET, NULL);
> > -                   return (0);
> > -           }
> > -           break;
> > -#ifdef INET6
> > -   case AF_INET6:
> > -           if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
> > -                   rtable_setsource(rtableid, AF_INET6, NULL);
> > -                   return (0);
> > +   struct rt_srcaddr       *rtsa = NULL;
> > +
> > +   LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > +           if (rtsa->rts_rtableid == rtableid &&
> > +               rtsa->rts_rt->rt_dest->sa_family == rt->rt_dest->sa_family)
> > +                   break;
> > +   }
> > +   if (rtsa == NULL) {
> > +           if ((rtsa = malloc(sizeof(struct rt_srcaddr), M_IFADDR,
> > +               M_NOWAIT|M_ZERO)) == NULL)
> > +                   return (ENOMEM);
> > +           rtsa->rts_rtableid = rtableid;
> > +           rtsa->rts_rt = rt;
> > +           LIST_INSERT_HEAD(&srcaddr_h, rtsa, rts_next);
> > +   } else {
> > +           /* Update existing entry */
> > +           rtfree(rtsa->rts_rt);
> > +           rtsa->rts_rt = rt;
> > +   }
> > +
> > +   return (0);
> > +}
> > +
> > +/*
> > + * Return the 'ifa' associated to a given 'rt' unless a preferred
> > + * source address has been specified for the same routing table.
> > + */
> > +struct rtentry *
> > +rt_get_rt(int af, unsigned int rtableid)
> > +{
> > +   struct rt_srcaddr       *rtsa = NULL;
> > +
> > +   LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > +           if (rtsa->rts_rtableid == rtableid &&
> > +               rtsa->rts_rt->rt_dest->sa_family == af)
> > +                   break;
> > +   }
> > +   if (rtsa)
> > +           return (rtsa->rts_rt);
> > +
> > +   return (NULL);
> > +}

Here we should document that it is safe to dereference `ifa' as long as
the caller owns a valid reference to `rt'.

> > +struct ifaddr *
> > +rt_get_ifa(struct rtentry *rt, unsigned int rtableid)
> > +{
> > +   struct rt_srcaddr       *rtsa = NULL;
> > +   struct ifnet            *ifp = NULL;
> > +
> > +   if (ISSET(rt->rt_flags, RTF_HOST|RTF_LLINFO))
> > +           return (rt->rt_ifa);
> > +

Please add an assert for the lock here, just before iterating on a data 
structure
that requires it.

> > +   LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > +           if (rtsa->rts_rtableid == rtableid &&
> > +               rtsa->rts_rt->rt_dest->sa_family == rt->rt_dest->sa_family)
> > +                   break;
> > +   }
> > +   if (rtsa != NULL && rtisvalid(rtsa->rts_rt)) {
> > +           struct rtentry  *nrt = rtsa->rts_rt;
> > +
> > +           ifp = if_get(nrt->rt_ifidx);
> > +           if (ifp != NULL) {
> > +                   if (ISSET(ifp->if_flags, IFF_UP)) {
> > +                           if_put(ifp);
> > +                           return (nrt->rt_ifa);
> > +                   }

I don't understand this check.  The `ifp' can be brought down at
anytime.  This seems like a workaround for something and should be at
least documented.

> >             }
> > -           break;
> > -#endif
> > -   default:
> > -           return (EAFNOSUPPORT);
> >     }
> > +   return (rt->rt_ifa);
> > +}
> >  
> > -   /*
> > -    * Check if source address is assigned to an interface in the
> > -    * same rdomain
> > -    */
> > -   if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL)
> > -           return (EINVAL);
> > +void
> > +rt_sourceclear(struct rtentry *rt, unsigned int rtableid)
> > +{
> > +   struct rt_srcaddr       *rtsa = NULL;
> >  
> > -   return (rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr));
> > +   if (rt == NULL)
> > +           return;
> > +
> > +   LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > +           if (rtsa->rts_rtableid != rtableid)
> > +                   continue;
> > +           if (rtsa->rts_rt == rt) {
> > +                   LIST_REMOVE(rtsa, rts_next);

I'd suggest using the 'break' idiom and not calling a LIST_REMOVE()
inside a list which isn't a *_SAFE() variant, that is IMHO less
confusing.

> > +                   free(rtsa, M_IFADDR, sizeof(struct rt_srcaddr));
> > +                   break;
> > +           }
> > +   }
> >  }
> >  
> >  /*
> > Index: netinet/in_pcb.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/in_pcb.c,v
> > retrieving revision 1.252
> > diff -u -p -r1.252 in_pcb.c
> > --- netinet/in_pcb.c        7 Nov 2020 09:51:40 -0000       1.252
> > +++ netinet/in_pcb.c        9 Jan 2021 16:04:02 -0000
> > @@ -887,7 +887,6 @@ in_pcbselsrc(struct in_addr **insrc, str
> >     struct route *ro = &inp->inp_route;
> >     struct in_addr *laddr = &inp->inp_laddr;
> >     u_int rtableid = inp->inp_rtableid;
> > -   struct sockaddr *ip4_source = NULL;
> >  
> >     struct sockaddr_in *sin2;
> >     struct in_ifaddr *ia = NULL;
> > @@ -951,30 +950,11 @@ in_pcbselsrc(struct in_addr **insrc, str
> >     }
> >  
> >     /*
> > -    * If we found a route, use the address
> > -    * corresponding to the outgoing interface.
> > +    * If we found a route, use the address corresponding to the
> > +    * outgoing interface or the preferred source address if set.
> >      */
> >     if (ro->ro_rt != NULL)
> > -           ia = ifatoia(ro->ro_rt->rt_ifa);
> > -
> > -   /*
> > -    * Use preferred source address if :
> > -    * - destination is not onlink
> > -    * - preferred source addresss is set
> > -    * - output interface is UP
> > -    */
> > -   if (ro->ro_rt && !(ro->ro_rt->rt_flags & RTF_LLINFO) &&
> > -       !(ro->ro_rt->rt_flags & RTF_HOST)) {
> > -           ip4_source = rtable_getsource(rtableid, AF_INET);
> > -           if (ip4_source != NULL) {
> > -                   struct ifaddr *ifa;
> > -                   if ((ifa = ifa_ifwithaddr(ip4_source, rtableid)) !=
> > -                       NULL && ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) {
> > -                           *insrc = &satosin(ip4_source)->sin_addr;
> > -                           return (0);
> > -                   }
> > -           }
> > -   }
> > +           ia = ifatoia(rt_get_ifa(ro->ro_rt, rtableid));
> >  
> >     if (ia == NULL)
> >             return (EADDRNOTAVAIL);
> > Index: netinet/ip_icmp.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/ip_icmp.c,v
> > retrieving revision 1.184
> > diff -u -p -r1.184 ip_icmp.c
> > --- netinet/ip_icmp.c       20 Dec 2020 21:15:47 -0000      1.184
> > +++ netinet/ip_icmp.c       9 Jan 2021 16:04:02 -0000
> > @@ -745,7 +745,7 @@ icmp_reflect(struct mbuf *m, struct mbuf
> >                     return (EHOSTUNREACH);
> >             }
> >  
> > -           ia = ifatoia(rt->rt_ifa);
> > +           ia = ifatoia(rt_get_ifa(rt, rtableid));
> >     }
> >  
> >     ip->ip_dst = ip->ip_src;
> > Index: netinet6/icmp6.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet6/icmp6.c,v
> > retrieving revision 1.233
> > diff -u -p -r1.233 icmp6.c
> > --- netinet6/icmp6.c        28 Oct 2020 17:27:35 -0000      1.233
> > +++ netinet6/icmp6.c        9 Jan 2021 16:04:02 -0000
> > @@ -1163,7 +1163,10 @@ icmp6_reflect(struct mbuf **mp, size_t o
> >                     rtfree(rt);
> >                     goto bad;
> >             }
> > -           ia6 = in6_ifawithscope(rt->rt_ifa->ifa_ifp, &t, rtableid);
> > +           ia6 = ifatoia6(rt_get_ifa(rt, rtableid));
> > +           if (ia6 == NULL)
> > +                   ia6 = in6_ifawithscope(rt->rt_ifa->ifa_ifp, &t,
> > +                       rtableid);
> >             if (ia6 != NULL)
> >                     src = &ia6->ia_addr.sin6_addr;
> >             if (src == NULL)
> > Index: netinet6/in6_src.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet6/in6_src.c,v
> > retrieving revision 1.84
> > diff -u -p -r1.84 in6_src.c
> > --- netinet6/in6_src.c      7 Nov 2020 09:51:40 -0000       1.84
> > +++ netinet6/in6_src.c      9 Jan 2021 16:04:02 -0000
> > @@ -100,7 +100,6 @@ in6_pcbselsrc(struct in6_addr **in6src, 
> >     struct in6_addr *laddr = &inp->inp_laddr6;
> >     u_int rtableid = inp->inp_rtableid;
> >     struct ifnet *ifp = NULL;
> > -   struct sockaddr *ip6_source = NULL;
> >     struct in6_addr *dst;
> >     struct in6_ifaddr *ia6 = NULL;
> >     struct in6_pktinfo *pi = NULL;
> > @@ -208,32 +207,16 @@ in6_pcbselsrc(struct in6_addr **in6src, 
> >      */
> >  
> >     if (ro->ro_rt) {
> > -           ifp = if_get(ro->ro_rt->rt_ifidx);
> > -           if (ifp != NULL) {
> > -                   ia6 = in6_ifawithscope(ifp, dst, rtableid);
> > -                   if_put(ifp);
> > +           ia6 = ifatoia6(rt_get_ifa(ro->ro_rt, rtableid));
> > +           if (ia6 == NULL) {
> > +                   ifp = if_get(ro->ro_rt->rt_ifidx);
> > +                   if (ifp != NULL) {
> > +                           ia6 = in6_ifawithscope(ifp, dst, rtableid);
> > +                           if_put(ifp);
> > +                   }
> >             }
> >             if (ia6 == NULL) /* xxx scope error ?*/
> >                     ia6 = ifatoia6(ro->ro_rt->rt_ifa);
                                       ^^^^^^^^^^^^^^^^^
Shouldn't rt_get_ifa() be used here as well?  Shouldn't the scope be
checked first?

I'd love to see this code unified with the logic in icmp6_reflect() bot
are different currently.

> > -   }
> > -
> > -   /*
> > -    * Use preferred source address if :
> > -    * - destination is not onlink
> > -    * - preferred source addresss is set
> > -    * - output interface is UP
> > -    */
> > -   if (ro->ro_rt && !(ro->ro_rt->rt_flags & RTF_LLINFO) &&
> > -       !(ro->ro_rt->rt_flags & RTF_HOST)) {
> > -           ip6_source = rtable_getsource(rtableid, AF_INET6);
> > -           if (ip6_source != NULL) {
> > -                   struct ifaddr *ifa;
> > -                   if ((ifa = ifa_ifwithaddr(ip6_source, rtableid)) !=
> > -                       NULL && ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) {
> > -                           *in6src = &satosin6(ip6_source)->sin6_addr;
> > -                           return (0);
> > -                   }
> > -           }
> >     }
> >  
> >     if (ia6 == NULL)

Reply via email to