Re: [PATCH net-next 2/3] ipv4: L3 and L4 hash-based multipath routing

2015-06-20 Thread Peter Nørlund
On Thu, 18 Jun 2015 15:52:22 -0700
Alexander Duyck alexander.h.du...@redhat.com wrote:

 
 
 On 06/17/2015 01:08 PM, Peter Nørlund wrote:
  This patch adds L3 and L4 hash-based multipath routing, selectable
  on a per-route basis with the reintroduced RTA_MP_ALGO attribute.
  The default is now RT_MP_ALG_L3_HASH.
 
  Signed-off-by: Peter Nørlund p...@ordbogen.com
  ---
include/net/ip_fib.h   |  4 ++-
include/net/route.h|  5 ++--
include/uapi/linux/rtnetlink.h | 14 ++-
net/ipv4/fib_frontend.c|  4 +++
net/ipv4/fib_semantics.c   | 34 ++---
net/ipv4/icmp.c|  4 +--
net/ipv4/route.c   | 56
  +++---
  net/ipv4/xfrm4_policy.c|  2 +- 8 files changed, 103
  insertions(+), 20 deletions(-)
 
  diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
  index 4be4f25..250d98e 100644
  --- a/include/net/ip_fib.h
  +++ b/include/net/ip_fib.h
  @@ -37,6 +37,7 @@ struct fib_config {
  u32 fc_flags;
  u32 fc_priority;
  __be32  fc_prefsrc;
  +   int fc_mp_alg;
  struct nlattr   *fc_mx;
  struct rtnexthop*fc_mp;
  int fc_mx_len;
  @@ -116,6 +117,7 @@ struct fib_info {
  int fib_nhs;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
  int fib_mp_weight;
  +   int fib_mp_alg;
#endif
  struct rcu_head rcu;
  struct fib_nh   fib_nh[0];
  @@ -308,7 +310,7 @@ int ip_fib_check_default(__be32 gw, struct
  net_device *dev); int fib_sync_down_dev(struct net_device *dev, int
  force); int fib_sync_down_addr(struct net *net, __be32 local);
int fib_sync_up(struct net_device *dev);
  -void fib_select_multipath(struct fib_result *res);
  +void fib_select_multipath(struct fib_result *res, const struct
  flowi4 *flow);
 
/* Exported by fib_trie.c */
void fib_trie_init(void);
  diff --git a/include/net/route.h b/include/net/route.h
  index fe22d03..1fc7deb 100644
  --- a/include/net/route.h
  +++ b/include/net/route.h
  @@ -110,7 +110,8 @@ struct in_device;
int ip_rt_init(void);
void rt_cache_flush(struct net *net);
void rt_flush_dev(struct net_device *dev);
  -struct rtable *__ip_route_output_key(struct net *, struct flowi4
  *flp); +struct rtable *__ip_route_output_key(struct net *, struct
  flowi4 *flp,
  +const struct flowi4 *mp_flow);
struct rtable *ip_route_output_flow(struct net *, struct flowi4
  *flp, struct sock *sk);
struct dst_entry *ipv4_blackhole_route(struct net *net,
  @@ -267,7 +268,7 @@ static inline struct rtable
  *ip_route_connect(struct flowi4 *fl4, sport, dport, sk);
 
  if (!dst || !src) {
  -   rt = __ip_route_output_key(net, fl4);
  +   rt = __ip_route_output_key(net, fl4, NULL);
  if (IS_ERR(rt))
  return rt;
  ip_rt_put(rt);
  diff --git a/include/uapi/linux/rtnetlink.h
  b/include/uapi/linux/rtnetlink.h index 17fb02f..dff4a72 100644
  --- a/include/uapi/linux/rtnetlink.h
  +++ b/include/uapi/linux/rtnetlink.h
  @@ -271,6 +271,18 @@ enum rt_scope_t {
#define RTM_F_EQUALIZE0x400   /* Multipath
  equalizer: NI   */ #define RTM_F_PREFIX
  0x800   /* Prefix addresses */
 
  +/* Multipath algorithms */
  +
  +enum rt_mp_alg_t {
  +   RT_MP_ALG_L3_HASH,  /* Was IP_MP_ALG_NONE */
  +   RT_MP_ALG_PER_PACKET,   /* Was IP_MP_ALG_RR */
  +   RT_MP_ALG_DRR,  /* not used */
  +   RT_MP_ALG_RANDOM,   /* not used */
  +   RT_MP_ALG_WRANDOM,  /* not used */
  +   RT_MP_ALG_L4_HASH,
  +   __RT_MP_ALG_MAX
  +};
  +
/* Reserved table identifiers */
 
enum rt_class_t {
  @@ -301,7 +313,7 @@ enum rtattr_type_t {
  RTA_FLOW,
  RTA_CACHEINFO,
  RTA_SESSION, /* no longer used */
  -   RTA_MP_ALGO, /* no longer used */
  +   RTA_MP_ALGO,
  RTA_TABLE,
  RTA_MARK,
  RTA_MFC_STATS,
  diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
  index 872494e..376e8c1 100644
  --- a/net/ipv4/fib_frontend.c
  +++ b/net/ipv4/fib_frontend.c
  @@ -590,6 +590,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX
  + 1] = { [RTA_PREFSRC]  = { .type = NLA_U32 },
  [RTA_METRICS]   = { .type = NLA_NESTED },
  [RTA_MULTIPATH] = { .len = sizeof(struct
  rtnexthop) },
  +   [RTA_MP_ALGO]   = { .type = NLA_U32 },
  [RTA_FLOW]  = { .type = NLA_U32 },
};
 
  @@ -650,6 +651,9 @@ static int rtm_to_fib_config(struct net *net,
  struct sk_buff *skb, cfg-fc_mp = nla_data(attr);
  cfg-fc_mp_len = nla_len(attr);
  break;
  +   case RTA_MP_ALGO:
  +   cfg-fc_mp_alg = nla_get_u32(attr);
  +   break;
  case RTA_FLOW:
 

Re: [PATCH net-next 2/3] ipv4: L3 and L4 hash-based multipath routing

2015-06-18 Thread Alexander Duyck



On 06/17/2015 01:08 PM, Peter Nørlund wrote:

This patch adds L3 and L4 hash-based multipath routing, selectable on a
per-route basis with the reintroduced RTA_MP_ALGO attribute. The default is
now RT_MP_ALG_L3_HASH.

Signed-off-by: Peter Nørlund p...@ordbogen.com
---
  include/net/ip_fib.h   |  4 ++-
  include/net/route.h|  5 ++--
  include/uapi/linux/rtnetlink.h | 14 ++-
  net/ipv4/fib_frontend.c|  4 +++
  net/ipv4/fib_semantics.c   | 34 ++---
  net/ipv4/icmp.c|  4 +--
  net/ipv4/route.c   | 56 +++---
  net/ipv4/xfrm4_policy.c|  2 +-
  8 files changed, 103 insertions(+), 20 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 4be4f25..250d98e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -37,6 +37,7 @@ struct fib_config {
u32 fc_flags;
u32 fc_priority;
__be32  fc_prefsrc;
+   int fc_mp_alg;
struct nlattr   *fc_mx;
struct rtnexthop*fc_mp;
int fc_mx_len;
@@ -116,6 +117,7 @@ struct fib_info {
int fib_nhs;
  #ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_mp_weight;
+   int fib_mp_alg;
  #endif
struct rcu_head rcu;
struct fib_nh   fib_nh[0];
@@ -308,7 +310,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
  int fib_sync_down_dev(struct net_device *dev, int force);
  int fib_sync_down_addr(struct net *net, __be32 local);
  int fib_sync_up(struct net_device *dev);
-void fib_select_multipath(struct fib_result *res);
+void fib_select_multipath(struct fib_result *res, const struct flowi4 *flow);

  /* Exported by fib_trie.c */
  void fib_trie_init(void);
diff --git a/include/net/route.h b/include/net/route.h
index fe22d03..1fc7deb 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -110,7 +110,8 @@ struct in_device;
  int ip_rt_init(void);
  void rt_cache_flush(struct net *net);
  void rt_flush_dev(struct net_device *dev);
-struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
+struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp,
+const struct flowi4 *mp_flow);
  struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
struct sock *sk);
  struct dst_entry *ipv4_blackhole_route(struct net *net,
@@ -267,7 +268,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 
*fl4,
  sport, dport, sk);

if (!dst || !src) {
-   rt = __ip_route_output_key(net, fl4);
+   rt = __ip_route_output_key(net, fl4, NULL);
if (IS_ERR(rt))
return rt;
ip_rt_put(rt);
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 17fb02f..dff4a72 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -271,6 +271,18 @@ enum rt_scope_t {
  #define RTM_F_EQUALIZE0x400   /* Multipath equalizer: NI  
*/
  #define RTM_F_PREFIX  0x800   /* Prefix addresses */

+/* Multipath algorithms */
+
+enum rt_mp_alg_t {
+   RT_MP_ALG_L3_HASH,  /* Was IP_MP_ALG_NONE */
+   RT_MP_ALG_PER_PACKET,   /* Was IP_MP_ALG_RR */
+   RT_MP_ALG_DRR,  /* not used */
+   RT_MP_ALG_RANDOM,   /* not used */
+   RT_MP_ALG_WRANDOM,  /* not used */
+   RT_MP_ALG_L4_HASH,
+   __RT_MP_ALG_MAX
+};
+
  /* Reserved table identifiers */

  enum rt_class_t {
@@ -301,7 +313,7 @@ enum rtattr_type_t {
RTA_FLOW,
RTA_CACHEINFO,
RTA_SESSION, /* no longer used */
-   RTA_MP_ALGO, /* no longer used */
+   RTA_MP_ALGO,
RTA_TABLE,
RTA_MARK,
RTA_MFC_STATS,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e..376e8c1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -590,6 +590,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_PREFSRC]   = { .type = NLA_U32 },
[RTA_METRICS]   = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
+   [RTA_MP_ALGO]   = { .type = NLA_U32 },
[RTA_FLOW]  = { .type = NLA_U32 },
  };

@@ -650,6 +651,9 @@ static int rtm_to_fib_config(struct net *net, struct 
sk_buff *skb,
cfg-fc_mp = nla_data(attr);
cfg-fc_mp_len = nla_len(attr);
break;
+   case RTA_MP_ALGO:
+   cfg-fc_mp_alg = nla_get_u32(attr);
+   break;
case RTA_FLOW:
cfg-fc_flow = 

[PATCH net-next 2/3] ipv4: L3 and L4 hash-based multipath routing

2015-06-17 Thread Peter Nørlund
This patch adds L3 and L4 hash-based multipath routing, selectable on a
per-route basis with the reintroduced RTA_MP_ALGO attribute. The default is
now RT_MP_ALG_L3_HASH.

Signed-off-by: Peter Nørlund p...@ordbogen.com
---
 include/net/ip_fib.h   |  4 ++-
 include/net/route.h|  5 ++--
 include/uapi/linux/rtnetlink.h | 14 ++-
 net/ipv4/fib_frontend.c|  4 +++
 net/ipv4/fib_semantics.c   | 34 ++---
 net/ipv4/icmp.c|  4 +--
 net/ipv4/route.c   | 56 +++---
 net/ipv4/xfrm4_policy.c|  2 +-
 8 files changed, 103 insertions(+), 20 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 4be4f25..250d98e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -37,6 +37,7 @@ struct fib_config {
u32 fc_flags;
u32 fc_priority;
__be32  fc_prefsrc;
+   int fc_mp_alg;
struct nlattr   *fc_mx;
struct rtnexthop*fc_mp;
int fc_mx_len;
@@ -116,6 +117,7 @@ struct fib_info {
int fib_nhs;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_mp_weight;
+   int fib_mp_alg;
 #endif
struct rcu_head rcu;
struct fib_nh   fib_nh[0];
@@ -308,7 +310,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
 int fib_sync_down_dev(struct net_device *dev, int force);
 int fib_sync_down_addr(struct net *net, __be32 local);
 int fib_sync_up(struct net_device *dev);
-void fib_select_multipath(struct fib_result *res);
+void fib_select_multipath(struct fib_result *res, const struct flowi4 *flow);
 
 /* Exported by fib_trie.c */
 void fib_trie_init(void);
diff --git a/include/net/route.h b/include/net/route.h
index fe22d03..1fc7deb 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -110,7 +110,8 @@ struct in_device;
 int ip_rt_init(void);
 void rt_cache_flush(struct net *net);
 void rt_flush_dev(struct net_device *dev);
-struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
+struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp,
+const struct flowi4 *mp_flow);
 struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
struct sock *sk);
 struct dst_entry *ipv4_blackhole_route(struct net *net,
@@ -267,7 +268,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 
*fl4,
  sport, dport, sk);
 
if (!dst || !src) {
-   rt = __ip_route_output_key(net, fl4);
+   rt = __ip_route_output_key(net, fl4, NULL);
if (IS_ERR(rt))
return rt;
ip_rt_put(rt);
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 17fb02f..dff4a72 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -271,6 +271,18 @@ enum rt_scope_t {
 #define RTM_F_EQUALIZE 0x400   /* Multipath equalizer: NI  */
 #define RTM_F_PREFIX   0x800   /* Prefix addresses */
 
+/* Multipath algorithms */
+
+enum rt_mp_alg_t {
+   RT_MP_ALG_L3_HASH,  /* Was IP_MP_ALG_NONE */
+   RT_MP_ALG_PER_PACKET,   /* Was IP_MP_ALG_RR */
+   RT_MP_ALG_DRR,  /* not used */
+   RT_MP_ALG_RANDOM,   /* not used */
+   RT_MP_ALG_WRANDOM,  /* not used */
+   RT_MP_ALG_L4_HASH,
+   __RT_MP_ALG_MAX
+};
+
 /* Reserved table identifiers */
 
 enum rt_class_t {
@@ -301,7 +313,7 @@ enum rtattr_type_t {
RTA_FLOW,
RTA_CACHEINFO,
RTA_SESSION, /* no longer used */
-   RTA_MP_ALGO, /* no longer used */
+   RTA_MP_ALGO,
RTA_TABLE,
RTA_MARK,
RTA_MFC_STATS,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e..376e8c1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -590,6 +590,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_PREFSRC]   = { .type = NLA_U32 },
[RTA_METRICS]   = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
+   [RTA_MP_ALGO]   = { .type = NLA_U32 },
[RTA_FLOW]  = { .type = NLA_U32 },
 };
 
@@ -650,6 +651,9 @@ static int rtm_to_fib_config(struct net *net, struct 
sk_buff *skb,
cfg-fc_mp = nla_data(attr);
cfg-fc_mp_len = nla_len(attr);
break;
+   case RTA_MP_ALGO:
+   cfg-fc_mp_alg = nla_get_u32(attr);
+   break;
case RTA_FLOW:
cfg-fc_flow = nla_get_u32(attr);
break;
diff --git a/net/ipv4/fib_semantics.c