Re: [PATCH net-next 1/3] ipv4: Lock-less per-packet multipath
On Thu, 18 Jun 2015 12:42:05 -0700 Alexander Duyck wrote: > On 06/17/2015 01:08 PM, Peter Nørlund wrote: > > The current multipath attempted to be quasi random, but in most > > cases it behaved just like a round robin balancing. This patch > > refactors the algorithm to be exactly that and in doing so, avoids > > the spin lock. > > > > The new design paves the way for hash-based multipath, replacing the > > modulo with thresholds, minimizing disruption in case of failing > > paths or route replacements. > > > > Signed-off-by: Peter Nørlund > > --- > > include/net/ip_fib.h | 6 +-- > > net/ipv4/Kconfig | 1 + > > net/ipv4/fib_semantics.c | 116 > > ++- 3 files changed, 68 > > insertions(+), 55 deletions(-) > > > > diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h > > index 54271ed..4be4f25 100644 > > --- a/include/net/ip_fib.h > > +++ b/include/net/ip_fib.h > > @@ -76,8 +76,8 @@ struct fib_nh { > > unsigned intnh_flags; > > unsigned char nh_scope; > > #ifdef CONFIG_IP_ROUTE_MULTIPATH > > - int nh_weight; > > - int nh_power; > > + int nh_mp_weight; > > + atomic_tnh_mp_upper_bound; > > #endif > > #ifdef CONFIG_IP_ROUTE_CLASSID > > __u32 nh_tclassid; > > @@ -115,7 +115,7 @@ struct fib_info { > > #define fib_advmss fib_metrics[RTAX_ADVMSS-1] > > int fib_nhs; > > #ifdef CONFIG_IP_ROUTE_MULTIPATH > > - int fib_power; > > + int fib_mp_weight; > > #endif > > struct rcu_head rcu; > > struct fib_nh fib_nh[0]; > > I could do without some of this renaming. For example you could > probably not bother with adding the _mp piece to the name. That way > we don't have to track all the nh_weight -> nh_mp_weight changes. > Also you could probably just use the name fib_weight since not > including the _mp was already the convention for the multipath > portions of the structure anyway. > > This isn't really improving readability at all so I would say don't > bother renaming it. > Good point. I'll skip the renaming. > > diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig > > index d83071d..cb91f67 100644 > > --- a/net/ipv4/Kconfig > > +++ b/net/ipv4/Kconfig > > @@ -81,6 +81,7 @@ config IP_MULTIPLE_TABLES > > config IP_ROUTE_MULTIPATH > > bool "IP: equal cost multipath" > > depends on IP_ADVANCED_ROUTER > > + select BITREVERSE > > help > > Normally, the routing tables specify a single action to > > be taken in a deterministic manner for a given packet. If you say Y > > here diff --git a/net/ipv4/fib_semantics.c > > b/net/ipv4/fib_semantics.c index 28ec3c1..8c8df80 100644 > > --- a/net/ipv4/fib_semantics.c > > +++ b/net/ipv4/fib_semantics.c > > @@ -15,6 +15,7 @@ > > > > #include > > #include > > +#include > > #include > > #include > > #include > > @@ -57,7 +58,7 @@ static struct hlist_head > > fib_info_devhash[DEVINDEX_HASHSIZE]; > > > > #ifdef CONFIG_IP_ROUTE_MULTIPATH > > > > -static DEFINE_SPINLOCK(fib_multipath_lock); > > +static DEFINE_PER_CPU(u8, fib_mp_rr_counter); > > > > #define for_nexthops(fi) > > { \ int nhsel; const > > struct fib_nh *nh; \ @@ -261,7 > > +262,7 @@ static inline int nh_comp(const struct fib_info *fi, > > const struct fib_info *ofi) nh->nh_gw != onh->nh_gw || > > nh->nh_scope != onh->nh_scope || #ifdef CONFIG_IP_ROUTE_MULTIPATH > > - nh->nh_weight != onh->nh_weight || > > + nh->nh_mp_weight != onh->nh_mp_weight || > > #endif > > #ifdef CONFIG_IP_ROUTE_CLASSID > > nh->nh_tclassid != onh->nh_tclassid || > > @@ -449,6 +450,43 @@ static int fib_count_nexthops(struct rtnexthop > > *rtnh, int remaining) return remaining > 0 ? 0 : nhs; > > } > > > > This is a good example. If we don't do the rename we don't have to > review changes like the one above which just add extra overhead to > the patch. > Right. > > +static void fib_rebalance(struct fib_info *fi) > > +{ > > + int factor; > > + int total; > > + int w; > > + > > + if (fi->fib_nhs < 2) > > + return; > > + > > + total = 0; > > + for_nexthops(fi) { > > + if (!(nh->nh_flags & RTNH_F_DEAD)) > > + total += nh->nh_mp_weight; > > + } endfor_nexthops(fi); > > + > > + if (likely(total != 0)) { > > + factor = DIV_ROUND_UP(total, 8388608); > > + total /= factor; > > + } else { > > + factor = 1; > > + } > > + > > So where does the 8388608 value come from? Is it just here to help > restrict the upper_bound to a u8 value? > Yes. Although I think it is rare for the weight to be that large, the API supports it, so I'd better make sure nothing weird happens. Or is it too hypothetical? Today, if one were
Re: [PATCH net-next 1/3] ipv4: Lock-less per-packet multipath
On 06/17/2015 01:08 PM, Peter Nørlund wrote: The current multipath attempted to be quasi random, but in most cases it behaved just like a round robin balancing. This patch refactors the algorithm to be exactly that and in doing so, avoids the spin lock. The new design paves the way for hash-based multipath, replacing the modulo with thresholds, minimizing disruption in case of failing paths or route replacements. Signed-off-by: Peter Nørlund --- include/net/ip_fib.h | 6 +-- net/ipv4/Kconfig | 1 + net/ipv4/fib_semantics.c | 116 ++- 3 files changed, 68 insertions(+), 55 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 54271ed..4be4f25 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -76,8 +76,8 @@ struct fib_nh { unsigned intnh_flags; unsigned char nh_scope; #ifdef CONFIG_IP_ROUTE_MULTIPATH - int nh_weight; - int nh_power; + int nh_mp_weight; + atomic_tnh_mp_upper_bound; #endif #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; @@ -115,7 +115,7 @@ struct fib_info { #define fib_advmss fib_metrics[RTAX_ADVMSS-1] int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH - int fib_power; + int fib_mp_weight; #endif struct rcu_head rcu; struct fib_nh fib_nh[0]; I could do without some of this renaming. For example you could probably not bother with adding the _mp piece to the name. That way we don't have to track all the nh_weight -> nh_mp_weight changes. Also you could probably just use the name fib_weight since not including the _mp was already the convention for the multipath portions of the structure anyway. This isn't really improving readability at all so I would say don't bother renaming it. diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index d83071d..cb91f67 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -81,6 +81,7 @@ config IP_MULTIPLE_TABLES config IP_ROUTE_MULTIPATH bool "IP: equal cost multipath" depends on IP_ADVANCED_ROUTER + select BITREVERSE help Normally, the routing tables specify a single action to be taken in a deterministic manner for a given packet. If you say Y here diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 28ec3c1..8c8df80 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -57,7 +58,7 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; #ifdef CONFIG_IP_ROUTE_MULTIPATH -static DEFINE_SPINLOCK(fib_multipath_lock); +static DEFINE_PER_CPU(u8, fib_mp_rr_counter); #define for_nexthops(fi) {\ int nhsel; const struct fib_nh *nh; \ @@ -261,7 +262,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) nh->nh_gw != onh->nh_gw || nh->nh_scope != onh->nh_scope || #ifdef CONFIG_IP_ROUTE_MULTIPATH - nh->nh_weight != onh->nh_weight || + nh->nh_mp_weight != onh->nh_mp_weight || #endif #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid != onh->nh_tclassid || @@ -449,6 +450,43 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) return remaining > 0 ? 0 : nhs; } This is a good example. If we don't do the rename we don't have to review changes like the one above which just add extra overhead to the patch. +static void fib_rebalance(struct fib_info *fi) +{ + int factor; + int total; + int w; + + if (fi->fib_nhs < 2) + return; + + total = 0; + for_nexthops(fi) { + if (!(nh->nh_flags & RTNH_F_DEAD)) + total += nh->nh_mp_weight; + } endfor_nexthops(fi); + + if (likely(total != 0)) { + factor = DIV_ROUND_UP(total, 8388608); + total /= factor; + } else { + factor = 1; + } + So where does the 8388608 value come from? Is it just here to help restrict the upper_bound to a u8 value? + w = 0; + change_nexthops(fi) { + int upper_bound; + + if (nexthop_nh->nh_flags & RTNH_F_DEAD) { + upper_bound = -1; + } else { + w += nexthop_nh->nh_mp_weight / factor; + upper_bound = DIV_ROUND_CLOSEST(256 * w, total); + } This is doing some confusing stuff. I assume the whole point is to get the value to convert the upper_bound into a u8 value based on the weight where you end
[PATCH net-next 1/3] ipv4: Lock-less per-packet multipath
The current multipath attempted to be quasi random, but in most cases it behaved just like a round robin balancing. This patch refactors the algorithm to be exactly that and in doing so, avoids the spin lock. The new design paves the way for hash-based multipath, replacing the modulo with thresholds, minimizing disruption in case of failing paths or route replacements. Signed-off-by: Peter Nørlund --- include/net/ip_fib.h | 6 +-- net/ipv4/Kconfig | 1 + net/ipv4/fib_semantics.c | 116 ++- 3 files changed, 68 insertions(+), 55 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 54271ed..4be4f25 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -76,8 +76,8 @@ struct fib_nh { unsigned intnh_flags; unsigned char nh_scope; #ifdef CONFIG_IP_ROUTE_MULTIPATH - int nh_weight; - int nh_power; + int nh_mp_weight; + atomic_tnh_mp_upper_bound; #endif #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; @@ -115,7 +115,7 @@ struct fib_info { #define fib_advmss fib_metrics[RTAX_ADVMSS-1] int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH - int fib_power; + int fib_mp_weight; #endif struct rcu_head rcu; struct fib_nh fib_nh[0]; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index d83071d..cb91f67 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -81,6 +81,7 @@ config IP_MULTIPLE_TABLES config IP_ROUTE_MULTIPATH bool "IP: equal cost multipath" depends on IP_ADVANCED_ROUTER + select BITREVERSE help Normally, the routing tables specify a single action to be taken in a deterministic manner for a given packet. If you say Y here diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 28ec3c1..8c8df80 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -57,7 +58,7 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; #ifdef CONFIG_IP_ROUTE_MULTIPATH -static DEFINE_SPINLOCK(fib_multipath_lock); +static DEFINE_PER_CPU(u8, fib_mp_rr_counter); #define for_nexthops(fi) { \ int nhsel; const struct fib_nh *nh; \ @@ -261,7 +262,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) nh->nh_gw != onh->nh_gw || nh->nh_scope != onh->nh_scope || #ifdef CONFIG_IP_ROUTE_MULTIPATH - nh->nh_weight != onh->nh_weight || + nh->nh_mp_weight != onh->nh_mp_weight || #endif #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid != onh->nh_tclassid || @@ -449,6 +450,43 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) return remaining > 0 ? 0 : nhs; } +static void fib_rebalance(struct fib_info *fi) +{ + int factor; + int total; + int w; + + if (fi->fib_nhs < 2) + return; + + total = 0; + for_nexthops(fi) { + if (!(nh->nh_flags & RTNH_F_DEAD)) + total += nh->nh_mp_weight; + } endfor_nexthops(fi); + + if (likely(total != 0)) { + factor = DIV_ROUND_UP(total, 8388608); + total /= factor; + } else { + factor = 1; + } + + w = 0; + change_nexthops(fi) { + int upper_bound; + + if (nexthop_nh->nh_flags & RTNH_F_DEAD) { + upper_bound = -1; + } else { + w += nexthop_nh->nh_mp_weight / factor; + upper_bound = DIV_ROUND_CLOSEST(256 * w, total); + } + + atomic_set(&nexthop_nh->nh_mp_upper_bound, upper_bound); + } endfor_nexthops(fi); +} + static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg) { @@ -461,7 +499,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, nexthop_nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; nexthop_nh->nh_oif = rtnh->rtnh_ifindex; - nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; + nexthop_nh->nh_mp_weight = rtnh->rtnh_hops + 1; attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { @@ -884,7 +922,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_net->ipv4.fib_num_tclassid_users++; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH - nh->nh_weight = 1; + nh->nh_mp_w