On 12/02/15(Thu) 12:35, Martin Pieuchot wrote: > On 10/02/15(Tue) 03:04, Claudio Jeker wrote: > > There is no need to not allow the same network to be configured more then > > once. Instead just rely on the multipath and priority handling of the > > routing table to select the right route. > > Additionally this removes cloned routes (arp/npd cache) when the interface > > goes down or when the any of the multipath cloning route is changed. > > > > With this it is possible to run 2 dhclients on wired and wireless with a > > bridged network. Active TCP sessions still fail when the cable is > > unplugged. To fix this more is needed. > > > > This changes a fundamental part of the network stack and therefor broad > > testing is needed to find all the hidden dragons. > > Here's version of the diff rebased on top of the recent changes.
I think it's the time to get this in, then as a second step put the dhclient(8) bits. Claudio you have my ok. > Index: net/if_var.h > =================================================================== > RCS file: /cvs/src/sys/net/if_var.h,v > retrieving revision 1.20 > diff -u -p -r1.20 if_var.h > --- net/if_var.h 9 Feb 2015 03:09:57 -0000 1.20 > +++ net/if_var.h 12 Feb 2015 11:08:40 -0000 > @@ -392,6 +392,7 @@ do { > \ > /* default interface priorities */ > #define IF_WIRED_DEFAULT_PRIORITY 0 > #define IF_WIRELESS_DEFAULT_PRIORITY 4 > +#define IF_CARP_DEFAULT_PRIORITY 15 > > extern struct ifnet_head ifnet; > extern struct ifnet *lo0ifp; > Index: net/route.c > =================================================================== > RCS file: /cvs/src/sys/net/route.c,v > retrieving revision 1.206 > diff -u -p -r1.206 route.c > --- net/route.c 11 Feb 2015 23:34:43 -0000 1.206 > +++ net/route.c 12 Feb 2015 11:08:40 -0000 > @@ -554,6 +554,16 @@ rtdeletemsg(struct rtentry *rt, u_int ta > return (error); > } > > +static inline int > +rtequal(struct rtentry *a, struct rtentry *b) > +{ > + if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 && > + memcmp(rt_mask(a), rt_mask(b), rt_mask(a)->sa_len) == 0) > + return 1; > + else > + return 0; > +} > + > int > rtflushclone1(struct radix_node *rn, void *arg, u_int id) > { > @@ -561,7 +571,8 @@ rtflushclone1(struct radix_node *rn, voi > > rt = (struct rtentry *)rn; > parent = (struct rtentry *)arg; > - if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent == parent) > + if ((rt->rt_flags & RTF_CLONED) != 0 && (rt->rt_parent == parent || > + rtequal(rt->rt_parent, parent))) > rtdeletemsg(rt, id); > return 0; > } > @@ -1106,16 +1117,20 @@ rt_ifa_add(struct ifaddr *ifa, int flags > { > struct rtentry *rt, *nrt = NULL; > struct sockaddr_rtlabel sa_rl; > + struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; > struct rt_addrinfo info; > u_short rtableid = ifa->ifa_ifp->if_rdomain; > - u_int8_t prio = RTP_CONNECTED; > + u_int8_t prio = ifa->ifa_ifp->if_priority + RTP_STATIC; > int error; > > + sa_dl.sdl_type = ifa->ifa_ifp->if_type; > + sa_dl.sdl_index = ifa->ifa_ifp->if_index; > + > memset(&info, 0, sizeof(info)); > info.rti_ifa = ifa; > - info.rti_flags = flags; > + info.rti_flags = flags | RTF_MPATH; > info.rti_info[RTAX_DST] = dst; > - info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; > + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; > info.rti_info[RTAX_LABEL] = > rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl); > > @@ -1161,8 +1176,9 @@ rt_ifa_del(struct ifaddr *ifa, int flags > struct sockaddr *deldst; > struct rt_addrinfo info; > struct sockaddr_rtlabel sa_rl; > + struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; > u_short rtableid = ifa->ifa_ifp->if_rdomain; > - u_int8_t prio = RTP_CONNECTED; > + u_int8_t prio = ifa->ifa_ifp->if_priority + RTP_STATIC; > int error; > > if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { > @@ -1187,10 +1203,14 @@ rt_ifa_del(struct ifaddr *ifa, int flags > } > } > > + sa_dl.sdl_type = ifa->ifa_ifp->if_type; > + sa_dl.sdl_index = ifa->ifa_ifp->if_index; > + > memset(&info, 0, sizeof(info)); > info.rti_ifa = ifa; > info.rti_flags = flags; > info.rti_info[RTAX_DST] = dst; > + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; > info.rti_info[RTAX_LABEL] = > rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl); > > @@ -1692,6 +1715,15 @@ rt_if_linkstate_change(struct radix_node > } > } else { > if (rt->rt_flags & RTF_UP) { > + /* > + * Remove cloned routes (mainly arp) to > + * down interfaces so we have a chance to > + * clone a new route from a better source. > + */ > + if (rt->rt_flags & RTF_CLONED) { > + rtdeletemsg(rt, id); > + return (0); > + } > /* take route down */ > rt->rt_flags &= ~RTF_UP; > rn_mpath_reprio(rn, rt->rt_priority | RTP_DOWN); > Index: netinet/if_ether.c > =================================================================== > RCS file: /cvs/src/sys/netinet/if_ether.c,v > retrieving revision 1.146 > diff -u -p -r1.146 if_ether.c > --- netinet/if_ether.c 11 Feb 2015 23:34:43 -0000 1.146 > +++ netinet/if_ether.c 12 Feb 2015 11:10:11 -0000 > @@ -111,8 +111,6 @@ void db_print_llinfo(caddr_t); > int db_show_radix_node(struct radix_node *, void *, u_int); > #endif > > -static const struct sockaddr_dl null_sdl = { sizeof(null_sdl), AF_LINK }; > - > /* > * Timeout routine. Age arp_tab entries periodically. > */ > @@ -181,14 +179,6 @@ arp_rtrequest(int req, struct rtentry *r > if (rt->rt_flags & RTF_CLONING || > ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && !la)) { > /* > - * Case 1: This route should come from a route to iface. > - */ > - rt_setgate(rt, (struct sockaddr *)&null_sdl, > - ifp->if_rdomain); > - gate = rt->rt_gateway; > - SDL(gate)->sdl_type = ifp->if_type; > - SDL(gate)->sdl_index = ifp->if_index; > - /* > * Give this route an expiration time, even though > * it's a "permanent" route, so that routes cloned > * from it do not need their expiration time set. > @@ -251,10 +241,6 @@ arp_rtrequest(int req, struct rtentry *r > } > if (ifa) { > rt->rt_expire = 0; > - SDL(gate)->sdl_alen = ETHER_ADDR_LEN; > - memcpy(LLADDR(SDL(gate)), > - ((struct arpcom *)ifp)->ac_enaddr, ETHER_ADDR_LEN); > - > /* > * XXX Since lo0 is in the default rdomain we > * should not (ab)use it for any route related > Index: netinet/in.c > =================================================================== > RCS file: /cvs/src/sys/netinet/in.c,v > retrieving revision 1.115 > diff -u -p -r1.115 in.c > --- netinet/in.c 12 Jan 2015 13:51:45 -0000 1.115 > +++ netinet/in.c 12 Feb 2015 11:08:40 -0000 > @@ -93,8 +93,6 @@ int in_lifaddr_ioctl(struct socket *, u_ > struct ifnet *); > > void in_purgeaddr(struct ifaddr *); > -int in_addprefix(struct in_ifaddr *); > -int in_scrubprefix(struct in_ifaddr *); > int in_addhost(struct in_ifaddr *, struct sockaddr_in *); > int in_scrubhost(struct in_ifaddr *, struct sockaddr_in *); > int in_insert_prefix(struct in_ifaddr *); > @@ -590,7 +588,8 @@ in_ifscrub(struct ifnet *ifp, struct in_ > if (ISSET(ifp->if_flags, IFF_POINTOPOINT)) > in_scrubhost(ia, &ia->ia_dstaddr); > else if (!ISSET(ifp->if_flags, IFF_LOOPBACK)) > - in_scrubprefix(ia); > + if (ia->ia_flags & IFA_ROUTE) > + in_remove_prefix(ia); > } > > /* > @@ -669,7 +668,7 @@ in_ifinit(struct ifnet *ifp, struct in_i > goto out; > error = in_addhost(ia, &ia->ia_dstaddr); > } else if (!ISSET(ifp->if_flags, IFF_LOOPBACK)) { > - error = in_addprefix(ia); > + error = in_insert_prefix(ia); > } > > /* > @@ -759,125 +758,6 @@ in_remove_prefix(struct in_ifaddr *ia) > ifa->ifa_broadaddr); > > ia->ia_flags &= ~IFA_ROUTE; > -} > - > -/* > - * add a route to prefix ("connected route" in cisco terminology). > - * does nothing if there's some interface address with the same prefix > already. > - */ > -int > -in_addprefix(struct in_ifaddr *ia0) > -{ > - struct ifnet *ifp; > - struct ifaddr *ifa; > - struct in_ifaddr *ia; > - struct in_addr prefix, mask, p, m; > - > - prefix = ia0->ia_addr.sin_addr; > - mask = ia0->ia_sockmask.sin_addr; > - prefix.s_addr &= mask.s_addr; > - > - TAILQ_FOREACH(ifp, &ifnet, if_list) { > - if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) > - continue; > - > - if (ifp->if_rdomain != ia0->ia_ifp->if_rdomain) > - continue; > - > - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { > - if (ifa->ifa_addr->sa_family != AF_INET) > - continue; > - > - ia = ifatoia(ifa); > - > - if ((ia->ia_flags & IFA_ROUTE) == 0) > - continue; > - > - p = ia->ia_addr.sin_addr; > - m = ia->ia_sockmask.sin_addr; > - p.s_addr &= m.s_addr; > - > - if (prefix.s_addr != p.s_addr || > - mask.s_addr != m.s_addr) > - continue; > - > -#if NCARP > 0 > - /* move to a real interface instead of carp interface */ > - if (ia->ia_ifp->if_type == IFT_CARP && > - ia0->ia_ifp->if_type != IFT_CARP) { > - in_remove_prefix(ia); > - break; > - } > -#endif > - /* > - * If we got a matching prefix route inserted by other > - * interface address, we don't need to bother > - */ > - return (0); > - } > - } > - > - /* > - * noone seem to have prefix route. insert it. > - */ > - return in_insert_prefix(ia0); > -} > - > -/* > - * remove a route to prefix ("connected route" in cisco terminology). > - * re-installs the route by using another interface address, if there's one > - * with the same prefix (otherwise we lose the route mistakenly). > - */ > -int > -in_scrubprefix(struct in_ifaddr *ia0) > -{ > - struct ifnet *ifp; > - struct ifaddr *ifa; > - struct in_ifaddr *ia; > - struct in_addr prefix, mask, p, m; > - > - if ((ia0->ia_flags & IFA_ROUTE) == 0) > - return 0; > - > - prefix = ia0->ia_addr.sin_addr; > - mask = ia0->ia_sockmask.sin_addr; > - prefix.s_addr &= mask.s_addr; > - > - TAILQ_FOREACH(ifp, &ifnet, if_list) { > - if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) > - continue; > - > - if (ifp->if_rdomain != ia0->ia_ifp->if_rdomain) > - continue; > - > - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { > - if (ifa->ifa_addr->sa_family != AF_INET) > - continue; > - > - ia = ifatoia(ifa); > - > - if ((ia->ia_flags & IFA_ROUTE) != 0) > - continue; > - > - p = ia->ia_addr.sin_addr; > - m = ia->ia_sockmask.sin_addr; > - p.s_addr &= m.s_addr; > - > - if (prefix.s_addr != p.s_addr || > - mask.s_addr != m.s_addr) > - continue; > - > - /* Move IFA_ROUTE to the matching prefix route. */ > - in_remove_prefix(ia0); > - return (in_insert_prefix(ia)); > - } > - } > - > - /* > - * noone seem to have prefix route. remove it. > - */ > - in_remove_prefix(ia0); > - return 0; > } > > /* > Index: netinet/ip_carp.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_carp.c,v > retrieving revision 1.246 > diff -u -p -r1.246 ip_carp.c > --- netinet/ip_carp.c 11 Feb 2015 04:29:29 -0000 1.246 > +++ netinet/ip_carp.c 12 Feb 2015 11:15:40 -0000 > @@ -751,6 +751,7 @@ carp_clone_create(ifc, unit) > ifp->if_addrlen = ETHER_ADDR_LEN; > ifp->if_hdrlen = ETHER_HDR_LEN; > ifp->if_mtu = ETHERMTU; > + ifp->if_priority = IF_CARP_DEFAULT_PRIORITY; > IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); > IFQ_SET_READY(&ifp->if_snd); > if_attach(ifp); > Index: netinet6/nd6.c > =================================================================== > RCS file: /cvs/src/sys/netinet6/nd6.c,v > retrieving revision 1.131 > diff -u -p -r1.131 nd6.c > --- netinet6/nd6.c 11 Feb 2015 23:34:43 -0000 1.131 > +++ netinet6/nd6.c 12 Feb 2015 11:10:40 -0000 > @@ -957,7 +957,6 @@ nd6_rtrequest(int req, struct rtentry *r > { > struct sockaddr *gate = rt->rt_gateway; > struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; > - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; > struct ifnet *ifp = rt->rt_ifp; > struct ifaddr *ifa; > struct nd_defrouter *dr; > @@ -1016,17 +1015,6 @@ nd6_rtrequest(int req, struct rtentry *r > */ > if ((rt->rt_flags & RTF_CLONING) || > ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && !ln)) { > - /* > - * Case 1: This route should come from a route to > - * interface (RTF_CLONING case) or the route should be > - * treated as on-link but is currently not > - * (RTF_LLINFO && !ln case). > - */ > - rt_setgate(rt, (struct sockaddr *)&null_sdl, > - ifp->if_rdomain); > - gate = rt->rt_gateway; > - SDL(gate)->sdl_type = ifp->if_type; > - SDL(gate)->sdl_index = ifp->if_index; > if (ln) > nd6_llinfo_settimer(ln, 0); > if ((rt->rt_flags & RTF_CLONING) != 0) > @@ -1062,7 +1050,7 @@ nd6_rtrequest(int req, struct rtentry *r > /* FALLTHROUGH */ > case RTM_RESOLVE: > if (gate->sa_family != AF_LINK || > - gate->sa_len < sizeof(null_sdl)) { > + gate->sa_len < sizeof(struct sockaddr_dl)) { > log(LOG_DEBUG, "%s: bad gateway value: %s\n", > __func__, ifp->if_xname); > break; > @@ -1144,14 +1132,9 @@ nd6_rtrequest(int req, struct rtentry *r > ifa = &in6ifa_ifpwithaddr(ifp, > &satosin6(rt_key(rt))->sin6_addr)->ia_ifa; > if (ifa) { > - caddr_t macp = nd6_ifptomac(ifp); > nd6_llinfo_settimer(ln, -1); > ln->ln_state = ND6_LLINFO_REACHABLE; > ln->ln_byhint = 0; > - if (macp) { > - memcpy(LLADDR(SDL(gate)), macp, > ifp->if_addrlen); > - SDL(gate)->sdl_alen = ifp->if_addrlen; > - } > > /* > * XXX Since lo0 is in the default rdomain we >