Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple
On 2/21/18 9:22 AM, Ido Schimmel wrote: >> +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, >> + const struct sk_buff *skb) >> { >> struct flow_keys hash_keys; >> u32 mhash; >> >> -memset(_keys, 0, sizeof(hash_keys)); >> -hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; >> -if (skb) { >> -ip6_multipath_l3_keys(skb, _keys); >> -} else { >> -hash_keys.addrs.v6addrs.src = fl6->saddr; >> -hash_keys.addrs.v6addrs.dst = fl6->daddr; >> -hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; >> -hash_keys.basic.ip_proto = fl6->flowi6_proto; >> +switch (net->ipv6.sysctl.multipath_hash_policy) { >> +case 0: >> +memset(_keys, 0, sizeof(hash_keys)); >> +hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; >> +if (skb) { >> +ip6_multipath_l3_keys(skb, _keys); >> +} else { >> +hash_keys.addrs.v6addrs.src = fl6->saddr; >> +hash_keys.addrs.v6addrs.dst = fl6->daddr; >> +hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; >> +hash_keys.basic.ip_proto = fl6->flowi6_proto; >> +} >> +break; >> +case 1: >> +if (skb) { >> +unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; >> +struct flow_keys keys; >> + >> +/* short-circuit if we already have L4 hash present */ >> +if (skb->l4_hash) >> +return skb_get_hash_raw(skb) >> 1; >> + >> +memset(_keys, 0, sizeof(hash_keys)); >> + >> +skb_flow_dissect_flow_keys(skb, , flag); >> + >> +hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; >> +hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; > > Shouldn't you add: > > hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > > ? > > Otherwise flow_hash_from_keys() will not consistentify the ports and the > addresses and it will also not use the addresses for the hash > computation. > > It's missing from fib_multipath_hash() as well, so I might be missing > something. Yes, I think you are correct. The oversight here is based on the missing set in the ipv4 variant. > >> +hash_keys.ports.src = keys.ports.src; >> +hash_keys.ports.dst = keys.ports.dst; >> +hash_keys.tags.flow_label = keys.tags.flow_label; > > Why are you using the flow label? will remove. It is supposed to be an L4 hash.
Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple
On Mon, Feb 12, 2018 at 04:06:00PM -0800, David Ahern wrote: > Some operators prefer IPv6 path selection to use a standard 5-tuple > hash rather than just an L3 hash with the flow the label. To that end > add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb > ("net: ipv4: add support for ECMP hash policy choice"). The default > is still L3 which covers source and destination addresses along with > flow label and IPv6 protocol. > > Signed-off-by: David Ahern[...] > @@ -1819,20 +1820,53 @@ static void ip6_multipath_l3_keys(const struct > sk_buff *skb, > } > > /* if skb is set it will be used and fl6 can be NULL */ > -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb) > +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, > +const struct sk_buff *skb) > { > struct flow_keys hash_keys; > u32 mhash; > > - memset(_keys, 0, sizeof(hash_keys)); > - hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > - if (skb) { > - ip6_multipath_l3_keys(skb, _keys); > - } else { > - hash_keys.addrs.v6addrs.src = fl6->saddr; > - hash_keys.addrs.v6addrs.dst = fl6->daddr; > - hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; > - hash_keys.basic.ip_proto = fl6->flowi6_proto; > + switch (net->ipv6.sysctl.multipath_hash_policy) { > + case 0: > + memset(_keys, 0, sizeof(hash_keys)); > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > + if (skb) { > + ip6_multipath_l3_keys(skb, _keys); > + } else { > + hash_keys.addrs.v6addrs.src = fl6->saddr; > + hash_keys.addrs.v6addrs.dst = fl6->daddr; > + hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; > + hash_keys.basic.ip_proto = fl6->flowi6_proto; > + } > + break; > + case 1: > + if (skb) { > + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; > + struct flow_keys keys; > + > + /* short-circuit if we already have L4 hash present */ > + if (skb->l4_hash) > + return skb_get_hash_raw(skb) >> 1; > + > + memset(_keys, 0, sizeof(hash_keys)); > + > + skb_flow_dissect_flow_keys(skb, , flag); > + > + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; > + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; Shouldn't you add: hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; ? Otherwise flow_hash_from_keys() will not consistentify the ports and the addresses and it will also not use the addresses for the hash computation. It's missing from fib_multipath_hash() as well, so I might be missing something. > + hash_keys.ports.src = keys.ports.src; > + hash_keys.ports.dst = keys.ports.dst; > + hash_keys.tags.flow_label = keys.tags.flow_label; Why are you using the flow label? > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > + } else { > + memset(_keys, 0, sizeof(hash_keys)); > + hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV6_ADDRS; > + hash_keys.addrs.v6addrs.src = fl6->saddr; > + hash_keys.addrs.v6addrs.dst = fl6->daddr; > + hash_keys.ports.src = fl6->fl6_sport; > + hash_keys.ports.dst = fl6->fl6_dport; > + hash_keys.basic.ip_proto = fl6->flowi6_proto; > + } > } > mhash = flow_hash_from_keys(_keys); > > @@ -1858,7 +1892,7 @@ void ip6_route_input(struct sk_buff *skb) > if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) > fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; > if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) > - fl6.mp_hash = rt6_multipath_hash(, skb); > + fl6.mp_hash = rt6_multipath_hash(net, , skb); > skb_dst_drop(skb); > skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, , flags)); > }
Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple
Le 13/02/2018 à 22:02, David Ahern a écrit : > On 2/13/18 1:59 PM, Nicolas Dichtel wrote: >> Le 13/02/2018 à 21:35, David Ahern a écrit : >>> On 2/13/18 1:31 PM, Nicolas Dichtel wrote: Le 13/02/2018 à 01:06, David Ahern a écrit : > Some operators prefer IPv6 path selection to use a standard 5-tuple > hash rather than just an L3 hash with the flow the label. To that end > add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb > ("net: ipv4: add support for ECMP hash policy choice"). The default > is still L3 which covers source and destination addresses along with > flow label and IPv6 protocol. > > Signed-off-by: David Ahern> --- Do you plan to add the nl NETCONF support for this new sysctl? >>> >>> hmmm doesn't exist for the current IPv4 version, but in-kernel >>> drivers are notified. I wasn't planning on it, but it can be added. >>> >> Right for ipv4, it was on my todo list ;-) >> > > Awesome, then ipv6 is a 1-liner after you add ipv4 support. :-) > Sure :)
Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple
On 2/13/18 1:59 PM, Nicolas Dichtel wrote: > Le 13/02/2018 à 21:35, David Ahern a écrit : >> On 2/13/18 1:31 PM, Nicolas Dichtel wrote: >>> Le 13/02/2018 à 01:06, David Ahern a écrit : Some operators prefer IPv6 path selection to use a standard 5-tuple hash rather than just an L3 hash with the flow the label. To that end add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice"). The default is still L3 which covers source and destination addresses along with flow label and IPv6 protocol. Signed-off-by: David Ahern--- >>> Do you plan to add the nl NETCONF support for this new sysctl? >>> >> >> hmmm doesn't exist for the current IPv4 version, but in-kernel >> drivers are notified. I wasn't planning on it, but it can be added. >> > Right for ipv4, it was on my todo list ;-) > Awesome, then ipv6 is a 1-liner after you add ipv4 support. :-)
Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple
Le 13/02/2018 à 21:35, David Ahern a écrit : > On 2/13/18 1:31 PM, Nicolas Dichtel wrote: >> Le 13/02/2018 à 01:06, David Ahern a écrit : >>> Some operators prefer IPv6 path selection to use a standard 5-tuple >>> hash rather than just an L3 hash with the flow the label. To that end >>> add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb >>> ("net: ipv4: add support for ECMP hash policy choice"). The default >>> is still L3 which covers source and destination addresses along with >>> flow label and IPv6 protocol. >>> >>> Signed-off-by: David Ahern>>> --- >> Do you plan to add the nl NETCONF support for this new sysctl? >> > > hmmm doesn't exist for the current IPv4 version, but in-kernel > drivers are notified. I wasn't planning on it, but it can be added. > Right for ipv4, it was on my todo list ;-)
Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple
On 2/13/18 1:31 PM, Nicolas Dichtel wrote: > Le 13/02/2018 à 01:06, David Ahern a écrit : >> Some operators prefer IPv6 path selection to use a standard 5-tuple >> hash rather than just an L3 hash with the flow the label. To that end >> add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb >> ("net: ipv4: add support for ECMP hash policy choice"). The default >> is still L3 which covers source and destination addresses along with >> flow label and IPv6 protocol. >> >> Signed-off-by: David Ahern>> --- > Do you plan to add the nl NETCONF support for this new sysctl? > hmmm doesn't exist for the current IPv4 version, but in-kernel drivers are notified. I wasn't planning on it, but it can be added.
Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple
Le 13/02/2018 à 01:06, David Ahern a écrit : > Some operators prefer IPv6 path selection to use a standard 5-tuple > hash rather than just an L3 hash with the flow the label. To that end > add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb > ("net: ipv4: add support for ECMP hash policy choice"). The default > is still L3 which covers source and destination addresses along with > flow label and IPv6 protocol. > > Signed-off-by: David Ahern> --- Do you plan to add the nl NETCONF support for this new sysctl? Regards, Nicolas
[PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple
Some operators prefer IPv6 path selection to use a standard 5-tuple hash rather than just an L3 hash with the flow the label. To that end add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice"). The default is still L3 which covers source and destination addresses along with flow label and IPv6 protocol. Signed-off-by: David Ahern--- Documentation/networking/ip-sysctl.txt | 7 include/net/ip6_route.h| 3 +- include/net/netevent.h | 1 + include/net/netns/ipv6.h | 1 + net/ipv6/icmp.c| 2 +- net/ipv6/route.c | 64 ++ net/ipv6/sysctl_net_ipv6.c | 26 ++ 7 files changed, 87 insertions(+), 17 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index a553d4e4a0fb..783675a730e5 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN FALSE: disabled Default: FALSE +fib_multipath_hash_policy - INTEGER + Controls which hash policy to use for multipath routes. + Default: 0 (Layer 3) + Possible values: + 0 - Layer 3 (source and destination addresses plus flow label) + 1 - Layer 4 (standard 5-tuple) + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 27d23a65f3cd..f13657a62e5c 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb); +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, + const struct sk_buff *skb); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); diff --git a/include/net/netevent.h b/include/net/netevent.h index baee605a94ab..d9918261701c 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -27,6 +27,7 @@ enum netevent_notif_type { NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */ + NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 987cc4569cb8..6b0de3b71bbf 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_gc_elasticity; int ip6_rt_mtu_expires; int ip6_rt_min_advmss; + int multipath_hash_policy; int flowlabel_consistency; int auto_flowlabels; int icmpv6_time; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6ae5dd3f4d0d..e255c9dd9b57 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); - fl6.mp_hash = rt6_multipath_hash(, skb); + fl6.mp_hash = rt6_multipath_hash(net, , skb); security_skb_classify_flow(skb, flowi6_to_flowi()); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d3a95eed662..1b559f78a896 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -450,7 +450,8 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -static struct rt6_info *rt6_multipath_select(struct rt6_info *match, +static struct rt6_info *rt6_multipath_select(const struct net *net, +struct rt6_info *match, struct flowi6 *fl6, int oif, int strict) { @@ -460,7 +461,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, * case it will always be non-zero. Otherwise now is the time to do it. */ if (!fl6->mp_hash) - fl6->mp_hash = rt6_multipath_hash(fl6, NULL); + fl6->mp_hash = rt6_multipath_hash(net, fl6, NULL); if (fl6->mp_hash <= atomic_read(>rt6i_nh_upper_bound)) return match; @@ -929,7 +930,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, rt = rt6_device_match(net, rt, >saddr,