Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-21 Thread David Ahern
On 2/21/18 9:22 AM, Ido Schimmel wrote:

>> +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
>> +   const struct sk_buff *skb)
>>  {
>>  struct flow_keys hash_keys;
>>  u32 mhash;
>>  
>> -memset(_keys, 0, sizeof(hash_keys));
>> -hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
>> -if (skb) {
>> -ip6_multipath_l3_keys(skb, _keys);
>> -} else {
>> -hash_keys.addrs.v6addrs.src = fl6->saddr;
>> -hash_keys.addrs.v6addrs.dst = fl6->daddr;
>> -hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
>> -hash_keys.basic.ip_proto = fl6->flowi6_proto;
>> +switch (net->ipv6.sysctl.multipath_hash_policy) {
>> +case 0:
>> +memset(_keys, 0, sizeof(hash_keys));
>> +hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
>> +if (skb) {
>> +ip6_multipath_l3_keys(skb, _keys);
>> +} else {
>> +hash_keys.addrs.v6addrs.src = fl6->saddr;
>> +hash_keys.addrs.v6addrs.dst = fl6->daddr;
>> +hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
>> +hash_keys.basic.ip_proto = fl6->flowi6_proto;
>> +}
>> +break;
>> +case 1:
>> +if (skb) {
>> +unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
>> +struct flow_keys keys;
>> +
>> +/* short-circuit if we already have L4 hash present */
>> +if (skb->l4_hash)
>> +return skb_get_hash_raw(skb) >> 1;
>> +
>> +memset(_keys, 0, sizeof(hash_keys));
>> +
>> +skb_flow_dissect_flow_keys(skb, , flag);
>> +
>> +hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
>> +hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
> 
> Shouldn't you add:
> 
> hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
> 
> ?
> 
> Otherwise flow_hash_from_keys() will not consistentify the ports and the
> addresses and it will also not use the addresses for the hash
> computation.
> 
> It's missing from fib_multipath_hash() as well, so I might be missing
> something.

Yes, I think you are correct. The oversight here is based on the missing
set in the ipv4 variant.

> 
>> +hash_keys.ports.src = keys.ports.src;
>> +hash_keys.ports.dst = keys.ports.dst;
>> +hash_keys.tags.flow_label = keys.tags.flow_label;
> 
> Why are you using the flow label?

will remove. It is supposed to be an L4 hash.


Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-21 Thread Ido Schimmel
On Mon, Feb 12, 2018 at 04:06:00PM -0800, David Ahern wrote:
> Some operators prefer IPv6 path selection to use a standard 5-tuple
> hash rather than just an L3 hash with the flow the label. To that end
> add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb
> ("net: ipv4: add support for ECMP hash policy choice"). The default
> is still L3 which covers source and destination addresses along with
> flow label and IPv6 protocol.
> 
> Signed-off-by: David Ahern 

[...]

> @@ -1819,20 +1820,53 @@ static void ip6_multipath_l3_keys(const struct 
> sk_buff *skb,
>  }
>  
>  /* if skb is set it will be used and fl6 can be NULL */
> -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
> +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
> +const struct sk_buff *skb)
>  {
>   struct flow_keys hash_keys;
>   u32 mhash;
>  
> - memset(_keys, 0, sizeof(hash_keys));
> - hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
> - if (skb) {
> - ip6_multipath_l3_keys(skb, _keys);
> - } else {
> - hash_keys.addrs.v6addrs.src = fl6->saddr;
> - hash_keys.addrs.v6addrs.dst = fl6->daddr;
> - hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
> - hash_keys.basic.ip_proto = fl6->flowi6_proto;
> + switch (net->ipv6.sysctl.multipath_hash_policy) {
> + case 0:
> + memset(_keys, 0, sizeof(hash_keys));
> + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
> + if (skb) {
> + ip6_multipath_l3_keys(skb, _keys);
> + } else {
> + hash_keys.addrs.v6addrs.src = fl6->saddr;
> + hash_keys.addrs.v6addrs.dst = fl6->daddr;
> + hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
> + hash_keys.basic.ip_proto = fl6->flowi6_proto;
> + }
> + break;
> + case 1:
> + if (skb) {
> + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
> + struct flow_keys keys;
> +
> + /* short-circuit if we already have L4 hash present */
> + if (skb->l4_hash)
> + return skb_get_hash_raw(skb) >> 1;
> +
> + memset(_keys, 0, sizeof(hash_keys));
> +
> + skb_flow_dissect_flow_keys(skb, , flag);
> +
> + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
> + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;

Shouldn't you add:

hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;

?

Otherwise flow_hash_from_keys() will not consistentify the ports and the
addresses and it will also not use the addresses for the hash
computation.

It's missing from fib_multipath_hash() as well, so I might be missing
something.

> + hash_keys.ports.src = keys.ports.src;
> + hash_keys.ports.dst = keys.ports.dst;
> + hash_keys.tags.flow_label = keys.tags.flow_label;

Why are you using the flow label?

> + hash_keys.basic.ip_proto = keys.basic.ip_proto;
> + } else {
> + memset(_keys, 0, sizeof(hash_keys));
> + hash_keys.control.addr_type = 
> FLOW_DISSECTOR_KEY_IPV6_ADDRS;
> + hash_keys.addrs.v6addrs.src = fl6->saddr;
> + hash_keys.addrs.v6addrs.dst = fl6->daddr;
> + hash_keys.ports.src = fl6->fl6_sport;
> + hash_keys.ports.dst = fl6->fl6_dport;
> + hash_keys.basic.ip_proto = fl6->flowi6_proto;
> + }
>   }
>   mhash = flow_hash_from_keys(_keys);
>  
> @@ -1858,7 +1892,7 @@ void ip6_route_input(struct sk_buff *skb)
>   if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
>   fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
>   if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
> - fl6.mp_hash = rt6_multipath_hash(, skb);
> + fl6.mp_hash = rt6_multipath_hash(net, , skb);
>   skb_dst_drop(skb);
>   skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, , flags));
>  }


Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-13 Thread Nicolas Dichtel
Le 13/02/2018 à 22:02, David Ahern a écrit :
> On 2/13/18 1:59 PM, Nicolas Dichtel wrote:
>> Le 13/02/2018 à 21:35, David Ahern a écrit :
>>> On 2/13/18 1:31 PM, Nicolas Dichtel wrote:
 Le 13/02/2018 à 01:06, David Ahern a écrit :
> Some operators prefer IPv6 path selection to use a standard 5-tuple
> hash rather than just an L3 hash with the flow the label. To that end
> add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb
> ("net: ipv4: add support for ECMP hash policy choice"). The default
> is still L3 which covers source and destination addresses along with
> flow label and IPv6 protocol.
>
> Signed-off-by: David Ahern 
> ---
 Do you plan to add the nl NETCONF support for this new sysctl?

>>>
>>> hmmm doesn't exist for the current IPv4 version, but in-kernel
>>> drivers are notified. I wasn't planning on it, but it can be added.
>>>
>> Right for ipv4, it was on my todo list ;-)
>>
> 
> Awesome, then ipv6 is a 1-liner after you add ipv4 support. :-)
> 
Sure :)


Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-13 Thread David Ahern
On 2/13/18 1:59 PM, Nicolas Dichtel wrote:
> Le 13/02/2018 à 21:35, David Ahern a écrit :
>> On 2/13/18 1:31 PM, Nicolas Dichtel wrote:
>>> Le 13/02/2018 à 01:06, David Ahern a écrit :
 Some operators prefer IPv6 path selection to use a standard 5-tuple
 hash rather than just an L3 hash with the flow the label. To that end
 add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb
 ("net: ipv4: add support for ECMP hash policy choice"). The default
 is still L3 which covers source and destination addresses along with
 flow label and IPv6 protocol.

 Signed-off-by: David Ahern 
 ---
>>> Do you plan to add the nl NETCONF support for this new sysctl?
>>>
>>
>> hmmm doesn't exist for the current IPv4 version, but in-kernel
>> drivers are notified. I wasn't planning on it, but it can be added.
>>
> Right for ipv4, it was on my todo list ;-)
> 

Awesome, then ipv6 is a 1-liner after you add ipv4 support. :-)


Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-13 Thread Nicolas Dichtel
Le 13/02/2018 à 21:35, David Ahern a écrit :
> On 2/13/18 1:31 PM, Nicolas Dichtel wrote:
>> Le 13/02/2018 à 01:06, David Ahern a écrit :
>>> Some operators prefer IPv6 path selection to use a standard 5-tuple
>>> hash rather than just an L3 hash with the flow the label. To that end
>>> add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb
>>> ("net: ipv4: add support for ECMP hash policy choice"). The default
>>> is still L3 which covers source and destination addresses along with
>>> flow label and IPv6 protocol.
>>>
>>> Signed-off-by: David Ahern 
>>> ---
>> Do you plan to add the nl NETCONF support for this new sysctl?
>>
> 
> hmmm doesn't exist for the current IPv4 version, but in-kernel
> drivers are notified. I wasn't planning on it, but it can be added.
> 
Right for ipv4, it was on my todo list ;-)


Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-13 Thread David Ahern
On 2/13/18 1:31 PM, Nicolas Dichtel wrote:
> Le 13/02/2018 à 01:06, David Ahern a écrit :
>> Some operators prefer IPv6 path selection to use a standard 5-tuple
>> hash rather than just an L3 hash with the flow the label. To that end
>> add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb
>> ("net: ipv4: add support for ECMP hash policy choice"). The default
>> is still L3 which covers source and destination addresses along with
>> flow label and IPv6 protocol.
>>
>> Signed-off-by: David Ahern 
>> ---
> Do you plan to add the nl NETCONF support for this new sysctl?
> 

hmmm doesn't exist for the current IPv4 version, but in-kernel
drivers are notified. I wasn't planning on it, but it can be added.


Re: [PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-13 Thread Nicolas Dichtel
Le 13/02/2018 à 01:06, David Ahern a écrit :
> Some operators prefer IPv6 path selection to use a standard 5-tuple
> hash rather than just an L3 hash with the flow the label. To that end
> add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb
> ("net: ipv4: add support for ECMP hash policy choice"). The default
> is still L3 which covers source and destination addresses along with
> flow label and IPv6 protocol.
> 
> Signed-off-by: David Ahern 
> ---
Do you plan to add the nl NETCONF support for this new sysctl?


Regards,
Nicolas


[PATCH RFC net-next 5/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-12 Thread David Ahern
Some operators prefer IPv6 path selection to use a standard 5-tuple
hash rather than just an L3 hash with the flow the label. To that end
add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb
("net: ipv4: add support for ECMP hash policy choice"). The default
is still L3 which covers source and destination addresses along with
flow label and IPv6 protocol.

Signed-off-by: David Ahern 
---
 Documentation/networking/ip-sysctl.txt |  7 
 include/net/ip6_route.h|  3 +-
 include/net/netevent.h |  1 +
 include/net/netns/ipv6.h   |  1 +
 net/ipv6/icmp.c|  2 +-
 net/ipv6/route.c   | 64 ++
 net/ipv6/sysctl_net_ipv6.c | 26 ++
 7 files changed, 87 insertions(+), 17 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt 
b/Documentation/networking/ip-sysctl.txt
index a553d4e4a0fb..783675a730e5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
FALSE: disabled
Default: FALSE
 
+fib_multipath_hash_policy - INTEGER
+   Controls which hash policy to use for multipath routes.
+   Default: 0 (Layer 3)
+   Possible values:
+   0 - Layer 3 (source and destination addresses plus flow label)
+   1 - Layer 4 (standard 5-tuple)
+
 anycast_src_echo_reply - BOOLEAN
Controls the use of anycast addresses as source addresses for ICMPv6
echo reply
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 27d23a65f3cd..f13657a62e5c 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, 
struct rt6_info *rt,
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+  const struct sk_buff *skb);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
diff --git a/include/net/netevent.h b/include/net/netevent.h
index baee605a94ab..d9918261701c 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -27,6 +27,7 @@ enum netevent_notif_type {
NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+   NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 987cc4569cb8..6b0de3b71bbf 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
int ip6_rt_gc_elasticity;
int ip6_rt_mtu_expires;
int ip6_rt_min_advmss;
+   int multipath_hash_policy;
int flowlabel_consistency;
int auto_flowlabels;
int icmpv6_time;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3f4d0d..e255c9dd9b57 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 
code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
-   fl6.mp_hash = rt6_multipath_hash(, skb);
+   fl6.mp_hash = rt6_multipath_hash(net, , skb);
security_skb_classify_flow(skb, flowi6_to_flowi());
 
sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8d3a95eed662..1b559f78a896 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -450,7 +450,8 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
 }
 
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+struct rt6_info *match,
 struct flowi6 *fl6, int oif,
 int strict)
 {
@@ -460,7 +461,7 @@ static struct rt6_info *rt6_multipath_select(struct 
rt6_info *match,
 * case it will always be non-zero. Otherwise now is the time to do it.
 */
if (!fl6->mp_hash)
-   fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+   fl6->mp_hash = rt6_multipath_hash(net, fl6, NULL);
 
if (fl6->mp_hash <= atomic_read(>rt6i_nh_upper_bound))
return match;
@@ -929,7 +930,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net 
*net,
rt = rt6_device_match(net, rt, >saddr,