Martin Pieuchot(m...@openbsd.org) on 2015.10.25 16:14:27 +0100: > Diff below merges the guts of rtable_mpath_match() into rtable_lookup(). > As for the previous rtable_mpath_* diff this is a step towards MPATH by > default. > > This diff introduces a behavior change for RTM_GET. If multiple route > exists a gateway MUST be specified and the first one in the tree won't > be automagically selected by the kernel. > > Example: > > # route -n show -inet |grep 192.168.178/24 > 192.168.178/24 192.168.178.1 UGSP 1 230 - 8 > vio0 > 192.168.178/24 192.168.178.2 UGSP 0 0 - 8 > vio0
greping through 500k routes isnt really nice. > # route -n get 192.168.178/24 > route: writing to routing socket: No such process the error msg is misleading, something like "multiple routes available" or so would be nice. Otherwise i am lead to think there is no route. > # route -n get 192.168.178/24 192.168.178.1 > route to: 192.168.178.0 > destination: 192.168.178.0 > mask: 255.255.255.0 > gateway: 192.168.178.1 > interface: vio0 > if address: 192.168.178.5 > priority: 8 (static) > flags: <UP,GATEWAY,DONE,STATIC,MPATH> > use mtu expire > 326 0 0 > > > Is it acceptable? > > Index: net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.397 > diff -u -p -r1.397 if.c > --- net/if.c 25 Oct 2015 13:52:45 -0000 1.397 > +++ net/if.c 25 Oct 2015 15:06:44 -0000 > @@ -2360,7 +2360,7 @@ if_group_egress_build(void) > bzero(&sa_in, sizeof(sa_in)); > sa_in.sin_len = sizeof(sa_in); > sa_in.sin_family = AF_INET; > - rt0 = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in)); > + rt0 = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY); > if (rt0 != NULL) { > rt = rt0; > do { > @@ -2377,7 +2377,8 @@ if_group_egress_build(void) > > #ifdef INET6 > bcopy(&sa6_any, &sa_in6, sizeof(sa_in6)); > - rt0 = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6)); > + rt0 = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL, > + RTP_ANY); > if (rt0 != NULL) { > rt = rt0; > do { > Index: net/route.c > =================================================================== > RCS file: /cvs/src/sys/net/route.c,v > retrieving revision 1.263 > diff -u -p -r1.263 route.c > --- net/route.c 25 Oct 2015 14:48:51 -0000 1.263 > +++ net/route.c 25 Oct 2015 15:06:45 -0000 > @@ -733,25 +733,10 @@ rtrequest1(int req, struct rt_addrinfo * > switch (req) { > case RTM_DELETE: > rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], > - info->rti_info[RTAX_NETMASK]); > + info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], > + prio ); > if (rt == NULL) > return (ESRCH); > -#ifndef SMALL_KERNEL > - rt = rtable_mpath_match(tableid, rt, > - info->rti_info[RTAX_GATEWAY], prio); > - if (rt == NULL) > - return (ESRCH); > - > - /* > - * If we got multipath routes, we require users to specify > - * a matching gateway. > - */ > - if ((rt->rt_flags & RTF_MPATH) && > - info->rti_info[RTAX_GATEWAY] == NULL) { > - rtfree(rt); > - return (ESRCH); > - } > -#endif > > /* > * Since RTP_LOCAL cannot be set by userland, make > Index: net/rtable.c > =================================================================== > RCS file: /cvs/src/sys/net/rtable.c,v > retrieving revision 1.15 > diff -u -p -r1.15 rtable.c > --- net/rtable.c 25 Oct 2015 14:48:51 -0000 1.15 > +++ net/rtable.c 25 Oct 2015 15:06:45 -0000 > @@ -249,7 +249,7 @@ rtable_free(unsigned int rtableid) > > struct rtentry * > rtable_lookup(unsigned int rtableid, struct sockaddr *dst, > - struct sockaddr *mask) > + struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) > { > struct radix_node_head *rnh; > struct radix_node *rn; > @@ -264,6 +264,22 @@ rtable_lookup(unsigned int rtableid, str > return (NULL); > > rt = ((struct rtentry *)rn); > + > +#ifndef SMALL_KERNEL > + if (rnh->rnh_multipath) { > + rt = rt_mpath_matchgate(rt, gateway, prio); > + if (rt == NULL) > + return (NULL); > + } > + > + /* > + * If we got multipath routes, we require users to specify > + * a matching gateway. > + */ > + if (ISSET(rt->rt_flags, RTF_MPATH) && gateway == NULL) > + return (NULL); > +#endif /* !SMALL_KERNEL */ > + > rtref(rt); > > return (rt); > @@ -372,26 +388,6 @@ rtable_mpath_capable(unsigned int rtable > return (rnh->rnh_multipath); > } > > -struct rtentry * > -rtable_mpath_match(unsigned int rtableid, struct rtentry *rt0, > - struct sockaddr *gateway, uint8_t prio) > -{ > - struct radix_node_head *rnh; > - struct rtentry *rt; > - > - rnh = rtable_get(rtableid, rt_key(rt0)->sa_family); > - if (rnh == NULL || rnh->rnh_multipath == 0) > - return (rt0); > - > - rt = rt_mpath_matchgate(rt0, gateway, prio); > - > - if (rt != NULL) > - rtref(rt); > - rtfree(rt0); > - > - return (rt); > -} > - > /* Gateway selection by Hash-Threshold (RFC 2992) */ > struct rtentry * > rtable_mpath_select(struct rtentry *rt, uint32_t hash) > @@ -457,7 +453,7 @@ rtable_free(unsigned int rtableid) > > struct rtentry * > rtable_lookup(unsigned int rtableid, struct sockaddr *dst, > - struct sockaddr *mask) > + struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) > { > struct art_root *ar; > struct art_node *an; > @@ -489,7 +485,32 @@ rtable_lookup(unsigned int rtableid, str > if (an == NULL) > return (NULL); > > +#ifdef SMALL_KERNEL > rt = LIST_FIRST(&an->an_rtlist); > +#else > + LIST_FOREACH(rt, &an->an_rtlist, rt_next) { > + if (prio != RTP_ANY && > + (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) > + continue; > + > + if (gateway == NULL) > + break; > + > + if (rt->rt_gateway->sa_len == gateway->sa_len && > + memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) > + break; > + } > + if (rt == NULL) > + return (NULL); > + > + /* > + * If we got multipath routes, we require users to specify > + * a matching gateway. > + */ > + if (ISSET(rt->rt_flags, RTF_MPATH) && gateway == NULL) > + return (NULL); > +#endif /* SMALL_KERNEL */ > + > rtref(rt); > > return (rt); > @@ -745,33 +766,6 @@ int > rtable_mpath_capable(unsigned int rtableid, sa_family_t af) > { > return (1); > -} > - > -struct rtentry * > -rtable_mpath_match(unsigned int rtableid, struct rtentry *rt0, > - struct sockaddr *gateway, uint8_t prio) > -{ > - struct art_node *an = rt0->rt_node; > - struct rtentry *rt; > - > - LIST_FOREACH(rt, &an->an_rtlist, rt_next) { > - if (prio != RTP_ANY && > - (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) > - continue; > - > - if (gateway == NULL) > - break; > - > - if (rt->rt_gateway->sa_len == gateway->sa_len && > - memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) > - break; > - } > - > - if (rt != NULL) > - rtref(rt); > - rtfree(rt0); > - > - return (rt); > } > > /* Gateway selection by Hash-Threshold (RFC 2992) */ > Index: net/rtable.h > =================================================================== > RCS file: /cvs/src/sys/net/rtable.h,v > retrieving revision 1.8 > diff -u -p -r1.8 rtable.h > --- net/rtable.h 25 Oct 2015 14:48:51 -0000 1.8 > +++ net/rtable.h 25 Oct 2015 15:06:45 -0000 > @@ -54,7 +54,7 @@ unsigned int rtable_l2(unsigned int); > void rtable_l2set(unsigned int, unsigned int); > > struct rtentry *rtable_lookup(unsigned int, struct sockaddr *, > - struct sockaddr *); > + struct sockaddr *, struct sockaddr *, uint8_t); > struct rtentry *rtable_match(unsigned int, struct sockaddr *); > int rtable_insert(unsigned int, struct sockaddr *, > struct sockaddr *, struct sockaddr *, uint8_t, > Index: net/rtsock.c > =================================================================== > RCS file: /cvs/src/sys/net/rtsock.c,v > retrieving revision 1.178 > diff -u -p -r1.178 rtsock.c > --- net/rtsock.c 25 Oct 2015 11:58:11 -0000 1.178 > +++ net/rtsock.c 25 Oct 2015 15:06:45 -0000 > @@ -620,51 +620,27 @@ route_output(struct mbuf *m, ...) > error = EAFNOSUPPORT; > goto flush; > } > + > rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], > - info.rti_info[RTAX_NETMASK]); > - if (rt == NULL) { > - error = ESRCH; > - goto flush; > - } > -#ifndef SMALL_KERNEL > - /* First find the right priority. */ > - rt = rtable_mpath_match(tableid, rt, NULL, prio); > + info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], > + prio); > if (rt == NULL) { > - error = ESRCH; > - goto flush; > - } > - > - > - /* > - * For RTM_CHANGE/LOCK, if we got multipath routes, > - * a matching RTAX_GATEWAY is required. > - * OR > - * If a gateway is specified then RTM_GET and > - * RTM_LOCK must match the gateway no matter > - * what even in the non multipath case. > - */ > - if ((rt->rt_flags & RTF_MPATH) || > - (info.rti_info[RTAX_GATEWAY] && rtm->rtm_type != > - RTM_CHANGE)) { > - rt = rtable_mpath_match(tableid, rt, > - info.rti_info[RTAX_GATEWAY], prio); > - if (rt == NULL) { > - error = ESRCH; > - goto flush; > - } > /* > - * Only RTM_GET may use an empty gateway > - * on multipath routes > + * If RTAX_GATEWAY is the argument we're trying to > + * change, try to find a compatible route. > */ > - if (info.rti_info[RTAX_GATEWAY] == NULL && > - rtm->rtm_type != RTM_GET) { > - rtfree(rt); > - rt = NULL; > + if (rtm->rtm_type == RTM_CHANGE && > + info.rti_info[RTAX_GATEWAY] != NULL) { > + rt = rtable_lookup(tableid, > + info.rti_info[RTAX_DST], > + info.rti_info[RTAX_NETMASK], NULL, prio); > + } > + > + if (rt == NULL) { > error = ESRCH; > goto flush; > } > } > -#endif > > /* > * RTM_CHANGE/LOCK need a perfect match, rn_lookup() > --