Re: [PATCH net-next 2/3] ipv4: L3 and L4 hash-based multipath routing
On Thu, 18 Jun 2015 15:52:22 -0700 Alexander Duyck alexander.h.du...@redhat.com wrote: On 06/17/2015 01:08 PM, Peter Nørlund wrote: This patch adds L3 and L4 hash-based multipath routing, selectable on a per-route basis with the reintroduced RTA_MP_ALGO attribute. The default is now RT_MP_ALG_L3_HASH. Signed-off-by: Peter Nørlund p...@ordbogen.com --- include/net/ip_fib.h | 4 ++- include/net/route.h| 5 ++-- include/uapi/linux/rtnetlink.h | 14 ++- net/ipv4/fib_frontend.c| 4 +++ net/ipv4/fib_semantics.c | 34 ++--- net/ipv4/icmp.c| 4 +-- net/ipv4/route.c | 56 +++--- net/ipv4/xfrm4_policy.c| 2 +- 8 files changed, 103 insertions(+), 20 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 4be4f25..250d98e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -37,6 +37,7 @@ struct fib_config { u32 fc_flags; u32 fc_priority; __be32 fc_prefsrc; + int fc_mp_alg; struct nlattr *fc_mx; struct rtnexthop*fc_mp; int fc_mx_len; @@ -116,6 +117,7 @@ struct fib_info { int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_mp_weight; + int fib_mp_alg; #endif struct rcu_head rcu; struct fib_nh fib_nh[0]; @@ -308,7 +310,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, int force); int fib_sync_down_addr(struct net *net, __be32 local); int fib_sync_up(struct net_device *dev); -void fib_select_multipath(struct fib_result *res); +void fib_select_multipath(struct fib_result *res, const struct flowi4 *flow); /* Exported by fib_trie.c */ void fib_trie_init(void); diff --git a/include/net/route.h b/include/net/route.h index fe22d03..1fc7deb 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -110,7 +110,8 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); -struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); +struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp, +const struct flowi4 *mp_flow); struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); struct dst_entry *ipv4_blackhole_route(struct net *net, @@ -267,7 +268,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, sport, dport, sk); if (!dst || !src) { - rt = __ip_route_output_key(net, fl4); + rt = __ip_route_output_key(net, fl4, NULL); if (IS_ERR(rt)) return rt; ip_rt_put(rt); diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 17fb02f..dff4a72 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -271,6 +271,18 @@ enum rt_scope_t { #define RTM_F_EQUALIZE0x400 /* Multipath equalizer: NI */ #define RTM_F_PREFIX 0x800 /* Prefix addresses */ +/* Multipath algorithms */ + +enum rt_mp_alg_t { + RT_MP_ALG_L3_HASH, /* Was IP_MP_ALG_NONE */ + RT_MP_ALG_PER_PACKET, /* Was IP_MP_ALG_RR */ + RT_MP_ALG_DRR, /* not used */ + RT_MP_ALG_RANDOM, /* not used */ + RT_MP_ALG_WRANDOM, /* not used */ + RT_MP_ALG_L4_HASH, + __RT_MP_ALG_MAX +}; + /* Reserved table identifiers */ enum rt_class_t { @@ -301,7 +313,7 @@ enum rtattr_type_t { RTA_FLOW, RTA_CACHEINFO, RTA_SESSION, /* no longer used */ - RTA_MP_ALGO, /* no longer used */ + RTA_MP_ALGO, RTA_TABLE, RTA_MARK, RTA_MFC_STATS, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 872494e..376e8c1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -590,6 +590,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_PREFSRC] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_MP_ALGO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, }; @@ -650,6 +651,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg-fc_mp = nla_data(attr); cfg-fc_mp_len = nla_len(attr); break; + case RTA_MP_ALGO: + cfg-fc_mp_alg = nla_get_u32(attr); + break; case RTA_FLOW:
Re: [PATCH net-next 2/3] ipv4: L3 and L4 hash-based multipath routing
On 06/17/2015 01:08 PM, Peter Nørlund wrote: This patch adds L3 and L4 hash-based multipath routing, selectable on a per-route basis with the reintroduced RTA_MP_ALGO attribute. The default is now RT_MP_ALG_L3_HASH. Signed-off-by: Peter Nørlund p...@ordbogen.com --- include/net/ip_fib.h | 4 ++- include/net/route.h| 5 ++-- include/uapi/linux/rtnetlink.h | 14 ++- net/ipv4/fib_frontend.c| 4 +++ net/ipv4/fib_semantics.c | 34 ++--- net/ipv4/icmp.c| 4 +-- net/ipv4/route.c | 56 +++--- net/ipv4/xfrm4_policy.c| 2 +- 8 files changed, 103 insertions(+), 20 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 4be4f25..250d98e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -37,6 +37,7 @@ struct fib_config { u32 fc_flags; u32 fc_priority; __be32 fc_prefsrc; + int fc_mp_alg; struct nlattr *fc_mx; struct rtnexthop*fc_mp; int fc_mx_len; @@ -116,6 +117,7 @@ struct fib_info { int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_mp_weight; + int fib_mp_alg; #endif struct rcu_head rcu; struct fib_nh fib_nh[0]; @@ -308,7 +310,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, int force); int fib_sync_down_addr(struct net *net, __be32 local); int fib_sync_up(struct net_device *dev); -void fib_select_multipath(struct fib_result *res); +void fib_select_multipath(struct fib_result *res, const struct flowi4 *flow); /* Exported by fib_trie.c */ void fib_trie_init(void); diff --git a/include/net/route.h b/include/net/route.h index fe22d03..1fc7deb 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -110,7 +110,8 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); -struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); +struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp, +const struct flowi4 *mp_flow); struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); struct dst_entry *ipv4_blackhole_route(struct net *net, @@ -267,7 +268,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, sport, dport, sk); if (!dst || !src) { - rt = __ip_route_output_key(net, fl4); + rt = __ip_route_output_key(net, fl4, NULL); if (IS_ERR(rt)) return rt; ip_rt_put(rt); diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 17fb02f..dff4a72 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -271,6 +271,18 @@ enum rt_scope_t { #define RTM_F_EQUALIZE0x400 /* Multipath equalizer: NI */ #define RTM_F_PREFIX 0x800 /* Prefix addresses */ +/* Multipath algorithms */ + +enum rt_mp_alg_t { + RT_MP_ALG_L3_HASH, /* Was IP_MP_ALG_NONE */ + RT_MP_ALG_PER_PACKET, /* Was IP_MP_ALG_RR */ + RT_MP_ALG_DRR, /* not used */ + RT_MP_ALG_RANDOM, /* not used */ + RT_MP_ALG_WRANDOM, /* not used */ + RT_MP_ALG_L4_HASH, + __RT_MP_ALG_MAX +}; + /* Reserved table identifiers */ enum rt_class_t { @@ -301,7 +313,7 @@ enum rtattr_type_t { RTA_FLOW, RTA_CACHEINFO, RTA_SESSION, /* no longer used */ - RTA_MP_ALGO, /* no longer used */ + RTA_MP_ALGO, RTA_TABLE, RTA_MARK, RTA_MFC_STATS, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 872494e..376e8c1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -590,6 +590,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_PREFSRC] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_MP_ALGO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, }; @@ -650,6 +651,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg-fc_mp = nla_data(attr); cfg-fc_mp_len = nla_len(attr); break; + case RTA_MP_ALGO: + cfg-fc_mp_alg = nla_get_u32(attr); + break; case RTA_FLOW: cfg-fc_flow =
[PATCH net-next 2/3] ipv4: L3 and L4 hash-based multipath routing
This patch adds L3 and L4 hash-based multipath routing, selectable on a per-route basis with the reintroduced RTA_MP_ALGO attribute. The default is now RT_MP_ALG_L3_HASH. Signed-off-by: Peter Nørlund p...@ordbogen.com --- include/net/ip_fib.h | 4 ++- include/net/route.h| 5 ++-- include/uapi/linux/rtnetlink.h | 14 ++- net/ipv4/fib_frontend.c| 4 +++ net/ipv4/fib_semantics.c | 34 ++--- net/ipv4/icmp.c| 4 +-- net/ipv4/route.c | 56 +++--- net/ipv4/xfrm4_policy.c| 2 +- 8 files changed, 103 insertions(+), 20 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 4be4f25..250d98e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -37,6 +37,7 @@ struct fib_config { u32 fc_flags; u32 fc_priority; __be32 fc_prefsrc; + int fc_mp_alg; struct nlattr *fc_mx; struct rtnexthop*fc_mp; int fc_mx_len; @@ -116,6 +117,7 @@ struct fib_info { int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_mp_weight; + int fib_mp_alg; #endif struct rcu_head rcu; struct fib_nh fib_nh[0]; @@ -308,7 +310,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, int force); int fib_sync_down_addr(struct net *net, __be32 local); int fib_sync_up(struct net_device *dev); -void fib_select_multipath(struct fib_result *res); +void fib_select_multipath(struct fib_result *res, const struct flowi4 *flow); /* Exported by fib_trie.c */ void fib_trie_init(void); diff --git a/include/net/route.h b/include/net/route.h index fe22d03..1fc7deb 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -110,7 +110,8 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); -struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); +struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp, +const struct flowi4 *mp_flow); struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); struct dst_entry *ipv4_blackhole_route(struct net *net, @@ -267,7 +268,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, sport, dport, sk); if (!dst || !src) { - rt = __ip_route_output_key(net, fl4); + rt = __ip_route_output_key(net, fl4, NULL); if (IS_ERR(rt)) return rt; ip_rt_put(rt); diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 17fb02f..dff4a72 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -271,6 +271,18 @@ enum rt_scope_t { #define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */ #define RTM_F_PREFIX 0x800 /* Prefix addresses */ +/* Multipath algorithms */ + +enum rt_mp_alg_t { + RT_MP_ALG_L3_HASH, /* Was IP_MP_ALG_NONE */ + RT_MP_ALG_PER_PACKET, /* Was IP_MP_ALG_RR */ + RT_MP_ALG_DRR, /* not used */ + RT_MP_ALG_RANDOM, /* not used */ + RT_MP_ALG_WRANDOM, /* not used */ + RT_MP_ALG_L4_HASH, + __RT_MP_ALG_MAX +}; + /* Reserved table identifiers */ enum rt_class_t { @@ -301,7 +313,7 @@ enum rtattr_type_t { RTA_FLOW, RTA_CACHEINFO, RTA_SESSION, /* no longer used */ - RTA_MP_ALGO, /* no longer used */ + RTA_MP_ALGO, RTA_TABLE, RTA_MARK, RTA_MFC_STATS, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 872494e..376e8c1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -590,6 +590,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_PREFSRC] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_MP_ALGO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, }; @@ -650,6 +651,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg-fc_mp = nla_data(attr); cfg-fc_mp_len = nla_len(attr); break; + case RTA_MP_ALGO: + cfg-fc_mp_alg = nla_get_u32(attr); + break; case RTA_FLOW: cfg-fc_flow = nla_get_u32(attr); break; diff --git a/net/ipv4/fib_semantics.c