ChangeSet 1.2217.1.23, 2005/03/17 20:00:13-08:00, [EMAIL PROTECTED]
[IPV4]: Make multipath algs into true drivers.
This also makes them configurable on a per-route
basis via rtnetlink route attributes.
Based upon suggestions from Thomas Graf and Alexey
Kuznetsov.
Signed-off-by: David S. Miller <[EMAIL PROTECTED]>
include/linux/ip_mp_alg.h | 22 +++++
include/linux/rtnetlink.h | 1
include/net/ip_fib.h | 10 +-
include/net/ip_mp_alg.h | 79 +++++++++++++++++++-
include/net/route.h | 73 ------------------
net/ipv4/Kconfig | 26 ++----
net/ipv4/Makefile | 1
net/ipv4/fib_semantics.c | 19 ++++
net/ipv4/multipath.c | 54 +++++++++++++
net/ipv4/multipath_drr.c | 103 ++++++++++----------------
net/ipv4/multipath_random.c | 44 +++++++----
net/ipv4/multipath_rr.c | 34 +++++---
net/ipv4/multipath_wrandom.c | 109 +++++++++++----------------
net/ipv4/route.c | 169 +++++++++++++++++++------------------------
14 files changed, 402 insertions(+), 342 deletions(-)
diff -Nru a/include/linux/ip_mp_alg.h b/include/linux/ip_mp_alg.h
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/include/linux/ip_mp_alg.h 2005-03-18 14:08:05 -08:00
@@ -0,0 +1,22 @@
+/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values.
+ *
+ * Copyright (C) 2004, 2005 Einar Lueck <[EMAIL PROTECTED]>
+ * Copyright (C) 2005 David S. Miller <[EMAIL PROTECTED]>
+ */
+
+#ifndef _LINUX_IP_MP_ALG_H
+#define _LINUX_IP_MP_ALG_H
+
+enum ip_mp_alg {
+ IP_MP_ALG_NONE,
+ IP_MP_ALG_RR,
+ IP_MP_ALG_DRR,
+ IP_MP_ALG_RANDOM,
+ IP_MP_ALG_WRANDOM,
+ __IP_MP_ALG_MAX
+};
+
+#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1)
+
+#endif /* _LINUX_IP_MP_ALG_H */
+
diff -Nru a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
--- a/include/linux/rtnetlink.h 2005-03-18 14:08:05 -08:00
+++ b/include/linux/rtnetlink.h 2005-03-18 14:08:05 -08:00
@@ -250,6 +250,7 @@
RTA_FLOW,
RTA_CACHEINFO,
RTA_SESSION,
+ RTA_MP_ALGO,
__RTA_MAX
};
diff -Nru a/include/net/ip_fib.h b/include/net/ip_fib.h
--- a/include/net/ip_fib.h 2005-03-18 14:08:05 -08:00
+++ b/include/net/ip_fib.h 2005-03-18 14:08:05 -08:00
@@ -37,6 +37,7 @@
u32 *rta_flow;
struct rta_cacheinfo *rta_ci;
struct rta_session *rta_sess;
+ u32 *rta_mp_alg;
};
struct fib_info;
@@ -81,6 +82,9 @@
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_power;
#endif
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ u32 fib_mp_alg;
+#endif
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].nh_dev
};
@@ -95,7 +99,7 @@
unsigned char nh_sel;
unsigned char type;
unsigned char scope;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_WRANDOM
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
__u32 network;
__u32 netmask;
#endif
@@ -123,10 +127,10 @@
#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev)
#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif)
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_WRANDOM
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
#define FIB_RES_NETWORK(res) ((res).network)
#define FIB_RES_NETMASK(res) ((res).netmask)
-#else /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
+#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
#define FIB_RES_NETWORK(res) (0)
#define FIB_RES_NETMASK(res) (0)
#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
diff -Nru a/include/net/ip_mp_alg.h b/include/net/ip_mp_alg.h
--- a/include/net/ip_mp_alg.h 2005-03-18 14:08:05 -08:00
+++ b/include/net/ip_mp_alg.h 2005-03-18 14:08:05 -08:00
@@ -4,13 +4,84 @@
* Copyright (C) 2005 David S. Miller <[EMAIL PROTECTED]>
*/
-#ifndef _IP_MP_ALG_H
-#define _IP_MP_ALG_H
+#ifndef _NET_IP_MP_ALG_H
+#define _NET_IP_MP_ALG_H
#include <linux/config.h>
+#include <linux/ip_mp_alg.h>
#include <net/flow.h>
+#include <net/route.h>
-static int inline multipath_comparekeys(const struct flowi *flp1,
+struct fib_nh;
+
+struct ip_mp_alg_ops {
+ void (*mp_alg_select_route)(const struct flowi *flp,
+ struct rtable *rth, struct rtable **rp);
+ void (*mp_alg_flush)(void);
+ void (*mp_alg_set_nhinfo)(__u32 network, __u32 netmask,
+ unsigned char prefixlen,
+ const struct fib_nh *nh);
+ void (*mp_alg_remove)(struct rtable *rth);
+};
+
+extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg);
+extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg);
+
+extern struct ip_mp_alg_ops *ip_mp_alg_table[];
+
+static inline int multipath_select_route(const struct flowi *flp,
+ struct rtable *rth,
+ struct rtable **rp)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
+
+ if (ops && (rth->u.dst.flags & DST_BALANCED)) {
+ ops->mp_alg_select_route(flp, rth, rp);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+static inline void multipath_flush(void)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ int i;
+
+ for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) {
+ struct ip_mp_alg_ops *ops = ip_mp_alg_table[i];
+
+ if (ops)
+ ops->mp_alg_flush();
+ }
+#endif
+}
+
+static inline void multipath_set_nhinfo(struct rtable *rth,
+ __u32 network, __u32 netmask,
+ unsigned char prefixlen,
+ const struct fib_nh *nh)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
+
+ if (ops)
+ ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh);
+#endif
+}
+
+static inline void multipath_remove(struct rtable *rth)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
+
+ if (ops && (rth->u.dst.flags & DST_BALANCED))
+ ops->mp_alg_remove(rth);
+#endif
+}
+
+static inline int multipath_comparekeys(const struct flowi *flp1,
const struct flowi *flp2)
{
return flp1->fl4_dst == flp2->fl4_dst &&
@@ -23,4 +94,4 @@
(IPTOS_RT_MASK | RTO_ONLINK));
}
-#endif /* _IP_MP_ALG_H */
+#endif /* _NET_IP_MP_ALG_H */
diff -Nru a/include/net/route.h b/include/net/route.h
--- a/include/net/route.h 2005-03-18 14:08:05 -08:00
+++ b/include/net/route.h 2005-03-18 14:08:05 -08:00
@@ -202,77 +202,4 @@
return rt->peer;
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_WRANDOM
-extern void __multipath_flush(void);
-static inline void multipath_flush(void)
-{
- __multipath_flush();
-}
-#else /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
-static inline void multipath_flush(void)
-{
-}
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
-
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_WRANDOM
-extern void __multipath_set_nhinfo(__u32 network,
- __u32 netmask,
- unsigned char prefixlen,
- const struct fib_nh* nh);
-static inline void multipath_set_nhinfo(__u32 network,
- __u32 netmask,
- unsigned char prefixlen,
- const struct fib_nh* nh)
-{
- __multipath_set_nhinfo(network, netmask, prefixlen, nh);
-}
-#else
-static inline void multipath_set_nhinfo(__u32 network,
- __u32 netmask,
- unsigned char prefixlen,
- const struct fib_nh* nh)
-{
-}
-#endif
-
-
-
-#if defined(CONFIG_IP_ROUTE_MULTIPATH_RR) ||
defined(CONFIG_IP_ROUTE_MULTIPATH_DRR)
-extern void __multipath_remove(struct rtable *rt);
-static inline void multipath_remove(struct rtable *rt)
-{
- if ( rt->u.dst.flags & DST_BALANCED )
- __multipath_remove(rt);
-}
-#else /* CONFIG_IP_ROUTE_MULTIPATH_RR || CONFIG_IP_ROUTE_MULTIPATH_DRR */
-static inline void multipath_remove(struct rtable *rt)
-{
-}
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_RR || CONFIG_IP_ROUTE_MULTIPATH_DRR */
-
-
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-extern void __multipath_selectroute(const struct flowi *flp,
- struct rtable *rth,
- struct rtable **rp);
-static inline int multipath_selectroute(const struct flowi *flp,
- struct rtable *rth,
- struct rtable **rp)
-{
- if (rth->u.dst.flags & DST_BALANCED) {
- __multipath_selectroute(flp, rth, rp);
- return 1;
- } else {
- return 0;
- }
-}
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-static inline int multipath_selectroute(const struct flowi *flp,
- struct rtable *rth,
- struct rtable **rp)
-{
- return 0;
-}
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-
#endif /* _ROUTE_H */
diff -Nru a/net/ipv4/Kconfig b/net/ipv4/Kconfig
--- a/net/ipv4/Kconfig 2005-03-18 14:08:05 -08:00
+++ b/net/ipv4/Kconfig 2005-03-18 14:08:05 -08:00
@@ -100,43 +100,37 @@
If unsure, say N.
-#
-# multipath policy configuration
-#
-choice
- prompt "Multipath policy"
- depends on IP_ROUTE_MULTIPATH_CACHED
- default IP_ROUTE_MULTIPATH_RANDOM
-
config IP_ROUTE_MULTIPATH_RR
- bool "round robin (EXPERIMENTAL)"
+ tristate "MULTIPATH: round robin algorithm"
+ depends on IP_ROUTE_MULTIPATH_CACHED
help
Mulitpath routes are chosen according to Round Robin
config IP_ROUTE_MULTIPATH_RANDOM
- bool "random multipath (EXPERIMENTAL)"
+ tristate "MULTIPATH: random algorithm"
+ depends on IP_ROUTE_MULTIPATH_CACHED
help
Multipath routes are chosen in a random fashion. Actually,
there is no weight for a route. The advantage of this policy
is that it is implemented stateless and therefore introduces only
a very small delay.
+
config IP_ROUTE_MULTIPATH_WRANDOM
- bool "weighted random multipath (EXPERIMENTAL)"
+ tristate "MULTIPATH: weighted random algorithm"
+ depends on IP_ROUTE_MULTIPATH_CACHED
help
Multipath routes are chosen in a weighted random fashion.
The per route weights are the weights visible via ip route 2. As the
corresponding state management introduces some overhead routing delay
is increased.
+
config IP_ROUTE_MULTIPATH_DRR
- bool "interface round robin (EXPERIMENTAL)"
+ tristate "MULTIPATH: interface round robin algorithm"
+ depends on IP_ROUTE_MULTIPATH_CACHED
help
Connections are distributed in a round robin fashion over the
available interfaces. This policy makes sense if the connections
should be primarily distributed on interfaces and not on routes.
-endchoice
-#
-# END OF multipath policy configuration
-#
config IP_ROUTE_VERBOSE
bool "IP: verbose route monitoring"
diff -Nru a/net/ipv4/Makefile b/net/ipv4/Makefile
--- a/net/ipv4/Makefile 2005-03-18 14:08:05 -08:00
+++ b/net/ipv4/Makefile 2005-03-18 14:08:05 -08:00
@@ -27,6 +27,7 @@
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IP_VS) += ipvs/
obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o
+obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -Nru a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
--- a/net/ipv4/fib_semantics.c 2005-03-18 14:08:05 -08:00
+++ b/net/ipv4/fib_semantics.c 2005-03-18 14:08:05 -08:00
@@ -42,6 +42,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/ip_mp_alg.h>
#include "fib_lookup.h"
@@ -649,6 +650,9 @@
#else
const int nhs = 1;
#endif
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ u32 mp_alg = IP_MP_ALG_NONE;
+#endif
/* Fast check to catch the most weird cases */
if (fib_props[r->rtm_type].scope > r->rtm_scope)
@@ -661,6 +665,15 @@
goto err_inval;
}
#endif
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ if (rta->rta_mp_alg) {
+ mp_alg = *rta->rta_mp_alg;
+
+ if (mp_alg < IP_MP_ALG_NONE ||
+ mp_alg > IP_MP_ALG_MAX)
+ goto err_inval;
+ }
+#endif
err = -ENOBUFS;
if (fib_info_cnt >= fib_hash_size) {
@@ -752,6 +765,10 @@
#endif
}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ fi->fib_mp_alg = mp_alg;
+#endif
+
if (fib_props[r->rtm_type].error) {
if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
goto err_inval;
@@ -896,7 +913,7 @@
res->type = fa->fa_type;
res->scope = fa->fa_scope;
res->fi = fa->fa_info;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_WRANDOM
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
res->netmask = mask;
res->network = zone &
(0xFFFFFFFF >> (32 - prefixlen));
diff -Nru a/net/ipv4/multipath.c b/net/ipv4/multipath.c
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/net/ipv4/multipath.c 2005-03-18 14:08:05 -08:00
@@ -0,0 +1,54 @@
+/* multipath.c: IPV4 multipath algorithm support.
+ *
+ * Copyright (C) 2004, 2005 Einar Lueck <[EMAIL PROTECTED]>
+ * Copyright (C) 2005 David S. Miller <[EMAIL PROTECTED]>
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+#include <net/ip_mp_alg.h>
+
+static DEFINE_SPINLOCK(alg_table_lock);
+struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX];
+
+int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
+{
+ struct ip_mp_alg_ops **slot;
+ int err;
+
+ if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX)
+ return -EINVAL;
+
+ spin_lock(&alg_table_lock);
+ slot = &ip_mp_alg_table[n];
+ if (*slot != NULL) {
+ err = -EBUSY;
+ } else {
+ *slot = ops;
+ err = 0;
+ }
+ spin_unlock(&alg_table_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(multipath_alg_register);
+
+void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
+{
+ struct ip_mp_alg_ops **slot;
+
+ if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX)
+ return;
+
+ spin_lock(&alg_table_lock);
+ slot = &ip_mp_alg_table[n];
+ if (*slot == ops)
+ *slot = NULL;
+ spin_unlock(&alg_table_lock);
+
+ synchronize_net();
+}
+EXPORT_SYMBOL(multipath_alg_unregister);
diff -Nru a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
--- a/net/ipv4/multipath_drr.c 2005-03-18 14:08:05 -08:00
+++ b/net/ipv4/multipath_drr.c 2005-03-18 14:08:05 -08:00
@@ -56,12 +56,9 @@
#define MULTIPATH_MAX_DEVICECANDIDATES 10
static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
-static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
-static int registered_dev_notifier = 0;
+static DEFINE_SPINLOCK(state_lock);
static struct rtable *last_selection = NULL;
-#define RTprint(a...) // printk(KERN_DEBUG a)
-
static int inline __multipath_findslot(void)
{
int i;
@@ -85,8 +82,8 @@
return -1;
}
-static int multipath_dev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+static int drr_dev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
int devidx;
@@ -101,12 +98,6 @@
state[devidx].allocated = 0;
state[devidx].ifi = 0;
atomic_set(&state[devidx].usecount, 0);
- RTprint(KERN_DEBUG"%s: successfully removed device " \
- "with index %d\n",__FUNCTION__, devidx);
- } else {
- RTprint(KERN_DEBUG"%s: Device not relevant for " \
- " multipath: %d\n",
- __FUNCTION__, devidx);
}
spin_unlock_bh(&state_lock);
@@ -116,17 +107,17 @@
return NOTIFY_DONE;
}
-struct notifier_block multipath_dev_notifier = {
- .notifier_call = multipath_dev_event,
+struct notifier_block drr_dev_notifier = {
+ .notifier_call = drr_dev_event,
};
-void __multipath_remove(struct rtable *rt)
+static void drr_remove(struct rtable *rt)
{
if (last_selection == rt)
last_selection = NULL;
}
-void __multipath_safe_inc(atomic_t *usecount)
+static void drr_safe_inc(atomic_t *usecount)
{
int n;
@@ -136,9 +127,6 @@
if (n <= 0) {
int i;
- RTprint("%s: detected overflow, now ill will reset all "\
- "usecounts\n", __FUNCTION__);
-
spin_lock_bh(&state_lock);
for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++)
@@ -148,7 +136,7 @@
}
}
-void __multipath_selectroute(const struct flowi *flp,
+static void drr_select_route(const struct flowi *flp,
struct rtable *first, struct rtable **rp)
{
struct rtable *nh, *result, *cur_min;
@@ -156,16 +144,9 @@
int devidx = -1;
int cur_min_devidx = -1;
- /* register a notifier to stay informed about dying devices */
- if (!registered_dev_notifier) {
- registered_dev_notifier = 1;
- register_netdevice_notifier(&multipath_dev_notifier);
- }
-
/* if necessary and possible utilize the old alternative */
if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
last_selection != NULL) {
- RTprint( KERN_CRIT"%s: holding route \n", __FUNCTION__ );
result = last_selection;
*rp = result;
return;
@@ -206,9 +187,6 @@
devidx = __multipath_findslot();
if (devidx == -1) {
/* unlikely but possible */
- RTprint(KERN_DEBUG"%s: " \
- "out of space\n",
- __FUNCTION__);
continue;
}
@@ -216,13 +194,6 @@
state[devidx].ifi = nh_ifidx;
atomic_set(&state[devidx].usecount, 0);
min_usecount = 0;
- RTprint(KERN_DEBUG"%s: created " \
- " for " \
- "device %d and " \
- "min_usecount " \
- " == -1\n",
- __FUNCTION__,
- nh_ifidx);
}
spin_unlock_bh(&state_lock);
@@ -232,11 +203,7 @@
/* if the device has not been used it is
* the primary target
*/
- RTprint(KERN_DEBUG"%s: now setting " \
- "result to device %d\n",
- __FUNCTION__, nh_ifidx );
-
- __multipath_safe_inc(&state[devidx].usecount);
+ drr_safe_inc(&state[devidx].usecount);
result = nh;
} else {
int count =
@@ -247,13 +214,6 @@
cur_min = nh;
cur_min_devidx = devidx;
min_usecount = count;
-
- RTprint(KERN_DEBUG"%s: found " \
- "device " \
- "%d with usecount == %d\n",
- __FUNCTION__,
- nh_ifidx,
- min_usecount);
}
}
}
@@ -261,24 +221,45 @@
if (!result) {
if (cur_min) {
- RTprint( KERN_DEBUG"%s: index of device in state "\
- "array: %d\n",
- __FUNCTION__, cur_min_devidx );
- __multipath_safe_inc(&state[cur_min_devidx].usecount);
+ drr_safe_inc(&state[cur_min_devidx].usecount);
result = cur_min;
} else {
- RTprint( KERN_DEBUG"%s: utilized first\n",
- __FUNCTION__);
result = first;
}
- } else {
- RTprint(KERN_DEBUG"%s: utilize result: found device " \
- "%d with usecount == %d\n",
- __FUNCTION__, result->u.dst.dev->ifindex,
- min_usecount);
-
}
*rp = result;
last_selection = result;
}
+
+static struct ip_mp_alg_ops drr_ops = {
+ .mp_alg_select_route = drr_select_route,
+ .mp_alg_remove = drr_remove,
+};
+
+static int __init drr_init(void)
+{
+ int err = register_netdevice_notifier(&drr_dev_notifier);
+
+ if (err)
+ return err;
+
+ err = multipath_alg_register(&drr_ops, IP_MP_ALG_RR);
+ if (err)
+ goto fail;
+
+ return 0;
+
+fail:
+ unregister_netdevice_notifier(&drr_dev_notifier);
+ return err;
+}
+
+static void __exit drr_exit(void)
+{
+ unregister_netdevice_notifier(&drr_dev_notifier);
+ multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR);
+}
+
+module_init(drr_init);
+module_exit(drr_exit);
diff -Nru a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c
--- a/net/ipv4/multipath_random.c 2005-03-18 14:08:05 -08:00
+++ b/net/ipv4/multipath_random.c 2005-03-18 14:08:05 -08:00
@@ -47,17 +47,26 @@
#include <net/checksum.h>
#include <net/ip_mp_alg.h>
-#define RTprint(a...) // printk(KERN_DEBUG a)
-
#define MULTIPATH_MAX_CANDIDATES 40
/* interface to random number generation */
static unsigned int RANDOM_SEED = 93186752;
-static inline unsigned int random(unsigned int ubound);
-void __multipath_selectroute(const struct flowi *flp,
- struct rtable *first,
- struct rtable **rp)
+static inline unsigned int random(unsigned int ubound)
+{
+ static unsigned int a = 1588635695,
+ q = 2,
+ r = 1117695901;
+
+ RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
+
+ return RANDOM_SEED % ubound;
+}
+
+
+static void random_select_route(const struct flowi *flp,
+ struct rtable *first,
+ struct rtable **rp)
{
struct rtable *rt;
struct rtable *decision;
@@ -78,9 +87,6 @@
unsigned char candidate_no = (unsigned char)
random(candidate_count);
- RTprint( "%s: randomly chosen candidate: %d (count: %d)\n",
- __FUNCTION__, candidate_no, candidate_count );
-
/* find chosen candidate and adjust GC data for all candidates
* to ensure they stay in cache
*/
@@ -104,13 +110,19 @@
*rp = decision;
}
-static inline unsigned int random(unsigned int ubound)
-{
- static unsigned int a = 1588635695,
- q = 2,
- r = 1117695901;
+static struct ip_mp_alg_ops random_ops = {
+ .mp_alg_select_route = random_select_route,
+};
- RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
+static int __init random_init(void)
+{
+ return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM);
+}
- return RANDOM_SEED % ubound;
+static void __exit random_exit(void)
+{
+ multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM);
}
+
+module_init(random_init);
+module_exit(random_exit);
diff -Nru a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
--- a/net/ipv4/multipath_rr.c 2005-03-18 14:08:05 -08:00
+++ b/net/ipv4/multipath_rr.c 2005-03-18 14:08:05 -08:00
@@ -47,29 +47,25 @@
#include <net/checksum.h>
#include <net/ip_mp_alg.h>
-#define RTprint(a...) // printk(KERN_DEBUG a)
-
#define MULTIPATH_MAX_CANDIDATES 40
static struct rtable* last_used = NULL;
-void __multipath_remove(struct rtable *rt)
+static void rr_remove(struct rtable *rt)
{
if (last_used == rt)
last_used = NULL;
}
-void __multipath_selectroute(const struct flowi *flp,
- struct rtable *first, struct rtable **rp)
+static void rr_select_route(const struct flowi *flp,
+ struct rtable *first, struct rtable **rp)
{
struct rtable *nh, *result, *min_use_cand = NULL;
int min_use = -1;
/* if necessary and possible utilize the old alternative */
- if ( ( flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE ) != 0 &&
- last_used != NULL ) {
- RTprint( KERN_CRIT"%s: holding route \n",
- __FUNCTION__ );
+ if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
+ last_used != NULL) {
result = last_used;
goto out;
}
@@ -88,8 +84,6 @@
min_use = nh->u.dst.__use;
min_use_cand = nh;
}
- RTprint( KERN_CRIT"%s: found balanced entry\n",
- __FUNCTION__ );
}
}
result = min_use_cand;
@@ -101,3 +95,21 @@
result->u.dst.__use++;
*rp = result;
}
+
+static struct ip_mp_alg_ops rr_ops = {
+ .mp_alg_select_route = rr_select_route,
+ .mp_alg_remove = rr_remove,
+};
+
+static int __init rr_init(void)
+{
+ return multipath_alg_register(&rr_ops, IP_MP_ALG_RR);
+}
+
+static void __exit rr_exit(void)
+{
+ multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR);
+}
+
+module_init(rr_init);
+module_exit(rr_exit);
diff -Nru a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
--- a/net/ipv4/multipath_wrandom.c 2005-03-18 14:08:05 -08:00
+++ b/net/ipv4/multipath_wrandom.c 2005-03-18 14:08:05 -08:00
@@ -48,8 +48,6 @@
#include <net/ip_fib.h>
#include <net/ip_mp_alg.h>
-#define MPprint(a...) // printk(KERN_DEBUG a)
-
#define MULTIPATH_STATE_SIZE 15
struct multipath_candidate {
@@ -85,13 +83,19 @@
};
/* state: primarily weight per route information */
-static int multipath_state_initialized = 0;
-static spinlock_t state_big_lock = SPIN_LOCK_UNLOCKED;
static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
/* interface to random number generation */
static unsigned int RANDOM_SEED = 93186752;
-static __inline__ unsigned int random(unsigned int ubound);
+
+static inline unsigned int random(unsigned int ubound)
+{
+ static unsigned int a = 1588635695,
+ q = 2,
+ r = 1117695901;
+ RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
+ return RANDOM_SEED % ubound;
+}
static unsigned char __multipath_lookup_weight(const struct flowi *fl,
const struct rtable *rt)
@@ -129,8 +133,6 @@
if ((targetnetwork & d->netmask) == d->network) {
weight = d->nh_info->nh_weight;
- MPprint("%s: found weight %d for gateway %u\n",
- __FUNCTION__, weight, rt->rt_gateway);
goto out;
}
}
@@ -140,36 +142,19 @@
return weight;
}
-static void __multipath_init_state(void)
+static void wrandom_init_state(void)
{
- spin_lock(&state_big_lock);
+ int i;
- /* check again due to SMP and to prevent contention */
- if (!multipath_state_initialized) {
- int i;
-
- for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
- INIT_LIST_HEAD(&state[i].head);
- state[i].lock = SPIN_LOCK_UNLOCKED;
- }
+ for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
+ INIT_LIST_HEAD(&state[i].head);
+ spin_lock_init(&state[i].lock);
}
-
- /* now mark initialized */
- multipath_state_initialized = 1;
-
- spin_unlock(&state_big_lock);
}
-static void inline __multipath_init(void)
-{
- /* do not spinlock to reduce unnecessary contention */
- if (!multipath_state_initialized)
- __multipath_init_state();
-}
-
-void __multipath_selectroute(const struct flowi *flp,
- struct rtable *first,
- struct rtable **rp)
+static void wrandom_select_route(const struct flowi *flp,
+ struct rtable *first,
+ struct rtable **rp)
{
struct rtable *rt;
struct rtable *decision;
@@ -180,9 +165,6 @@
int selector;
const size_t size_mpc = sizeof(struct multipath_candidate);
- /* init state if necessary */
- __multipath_init();
-
/* collect all candidates and identify their weights */
for (rt = rcu_dereference(first); rt;
rt = rcu_dereference(rt->u.rt_next)) {
@@ -192,6 +174,9 @@
(struct multipath_candidate*)
kmalloc(size_mpc, GFP_KERNEL);
+ if (!mpc)
+ return;
+
power += __multipath_lookup_weight(flp, rt) * 10000;
mpc->power = power;
@@ -210,8 +195,6 @@
/* choose a weighted random candidate */
decision = first;
selector = random(power);
- MPprint("%s: random number %d in range %d\n", __FUNCTION__, selector,
- power);
last_power = 0;
/* select candidate, adjust GC data and cleanup local state */
@@ -219,13 +202,9 @@
last_mpc = NULL;
for (mpc = first_mpc; mpc; mpc = mpc->next) {
mpc->rt->u.dst.lastuse = jiffies;
- MPprint("%s: last_power = %d, selector: %d, mpc->power: %d\n",
- __FUNCTION__, last_power, selector, mpc->power);
- if (last_power <= selector && selector < mpc->power) {
+ if (last_power <= selector && selector < mpc->power)
decision = mpc->rt;
- MPprint("%s: selected %u\n", __FUNCTION__,
- decision->rt_gateway);
- }
+
last_power = mpc->power;
if (last_mpc)
kfree(last_mpc);
@@ -242,18 +221,15 @@
*rp = decision;
}
-void __multipath_set_nhinfo(__u32 network,
- __u32 netmask,
- unsigned char prefixlen,
- const struct fib_nh* nh)
+static void wrandom_set_nhinfo(__u32 network,
+ __u32 netmask,
+ unsigned char prefixlen,
+ const struct fib_nh *nh)
{
const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
struct multipath_route *r, *target_route = NULL;
struct multipath_dest *d, *target_dest = NULL;
- /* init state if necessary */
- __multipath_init();
-
/* store the weight information for a certain route */
spin_lock(&state[state_idx].lock);
@@ -321,20 +297,15 @@
kfree(dst);
}
-void __multipath_flush(void)
+static void wrandom_flush(void)
{
int i;
- MPprint("%s: called\n", __FUNCTION__);
-
- /* init state if necessary */
- __multipath_init();
-
/* defere delete to all entries */
for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
struct multipath_route *r;
- spin_lock(&state[i].lock);
+ spin_lock(&state[i].lock);
list_for_each_entry_rcu(r, &state[i].head, list) {
struct multipath_dest *d;
list_for_each_entry_rcu(d, &r->dests, list) {
@@ -349,15 +320,25 @@
spin_unlock(&state[i].lock);
}
+}
+
+static struct ip_mp_alg_ops wrandom_ops = {
+ .mp_alg_select_route = wrandom_select_route,
+ .mp_alg_flush = wrandom_flush,
+ .mp_alg_set_nhinfo = wrandom_set_nhinfo,
+};
+
+static int __init wrandom_init(void)
+{
+ wrandom_init_state();
- MPprint("%s: finished\n", __FUNCTION__);
+ return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
}
-static __inline__ unsigned int random(unsigned int ubound)
+static void __exit wrandom_exit(void)
{
- static unsigned int a = 1588635695,
- q = 2,
- r = 1117695901;
- RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
- return RANDOM_SEED % ubound;
+ multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
}
+
+module_init(wrandom_init);
+module_exit(wrandom_exit);
diff -Nru a/net/ipv4/route.c b/net/ipv4/route.c
--- a/net/ipv4/route.c 2005-03-18 14:08:05 -08:00
+++ b/net/ipv4/route.c 2005-03-18 14:08:05 -08:00
@@ -100,6 +100,7 @@
#include <net/tcp.h>
#include <net/icmp.h>
#include <net/xfrm.h>
+#include <net/ip_mp_alg.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
@@ -129,7 +130,7 @@
static int ip_rt_secret_interval = 10 * 60 * HZ;
static unsigned long rt_deadline;
-#define RTprint(a...) // printk(KERN_DEBUG a)
+#define RTprint(a...) printk(KERN_DEBUG a)
static struct timer_list rt_flush_timer;
static struct timer_list rt_periodic_timer;
@@ -451,13 +452,13 @@
static __inline__ void rt_free(struct rtable *rt)
{
- multipath_remove( rt );
+ multipath_remove(rt);
call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
}
static __inline__ void rt_drop(struct rtable *rt)
{
- multipath_remove( rt );
+ multipath_remove(rt);
ip_rt_put(rt);
call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
}
@@ -522,37 +523,36 @@
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
struct rtable *expentry,
- int* removed_count)
+ int *removed_count)
{
int passedexpired = 0;
struct rtable **nextstep = NULL;
struct rtable **rthp = chain_head;
struct rtable *rth;
+
if (removed_count)
*removed_count = 0;
+
while ((rth = *rthp) != NULL) {
- if ( rth == expentry ) {
+ if (rth == expentry)
passedexpired = 1;
- }
if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 &&
compare_keys(&(*rthp)->fl, &expentry->fl)) {
if (*rthp == expentry) {
*rthp = rth->u.rt_next;
continue;
- }
- else {
+ } else {
*rthp = rth->u.rt_next;
rt_free(rth);
if (removed_count)
++(*removed_count);
}
- }
- else {
- if ( !((*rthp)->u.dst.flags & DST_BALANCED) &&
- passedexpired && !nextstep ) {
+ } else {
+ if (!((*rthp)->u.dst.flags & DST_BALANCED) &&
+ passedexpired && !nextstep)
nextstep = &rth->u.rt_next;
- }
+
rthp = &rth->u.rt_next;
}
}
@@ -560,11 +560,10 @@
rt_free(expentry);
if (removed_count)
++(*removed_count);
-
+
return nextstep;
}
-
-#endif
+#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
/* This runs via a timer and thus is always in BH context. */
@@ -600,15 +599,13 @@
/* Cleanup aged off entries. */
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
/* remove all related balanced entries if necessary */
- if ( rth->u.dst.flags & DST_BALANCED ) {
+ if (rth->u.dst.flags & DST_BALANCED) {
rthp = rt_remove_balanced_route(
&rt_hash_table[i].chain,
rth, NULL);
- if (!rthp) {
+ if (!rthp)
break;
- }
- }
- else {
+ } else {
*rthp = rth->u.rt_next;
rt_free(rth);
}
@@ -785,19 +782,20 @@
continue;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- /* remove all related balanced entries if
necessary */
- if ( rth->u.dst.flags & DST_BALANCED ) {
+ /* remove all related balanced entries
+ * if necessary
+ */
+ if (rth->u.dst.flags & DST_BALANCED) {
int r;
+
rthp = rt_remove_balanced_route(
&rt_hash_table[i].chain,
rth,
&r);
goal -= r;
- if (!rthp) {
+ if (!rthp)
break;
- }
- }
- else {
+ } else {
*rthp = rth->u.rt_next;
rt_free(rth);
goal--;
@@ -1724,7 +1722,7 @@
rth->u.dst.flags= DST_HOST;
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if ( res->fi->fib_nhs > 1 )
+ if (res->fi->fib_nhs > 1)
rth->u.dst.flags |= DST_BALANCED;
#endif
if (in_dev->cnf.no_policy)
@@ -1795,65 +1793,57 @@
struct in_device *in_dev,
u32 daddr, u32 saddr, u32 tos)
{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
struct rtable* rth;
unsigned char hop, hopcount, lasthop;
int err = -EINVAL;
- unsigned hash;
- if (res->fi) {
+ unsigned int hash;
+
+ if (res->fi)
hopcount = res->fi->fib_nhs;
- }
- else {
+ else
hopcount = 1;
- }
+
lasthop = hopcount - 1;
/* distinguish between multipath and singlepath */
- if ( hopcount < 2 )
- return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
+ if (hopcount < 2)
+ return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
saddr, tos);
- RTprint( KERN_DEBUG"%s: entered (hopcount: %d)\n", __FUNCTION__,
- hopcount);
-
/* add all alternatives to the routing cache */
- for ( hop = 0; hop < hopcount; ++hop ) {
+ for (hop = 0; hop < hopcount; hop++) {
res->nh_sel = hop;
- RTprint( KERN_DEBUG"%s: entered (hopcount: %d)\n",
- __FUNCTION__, hopcount);
-
/* create a routing cache entry */
- err = __mkroute_input( skb, res, in_dev, daddr, saddr, tos,
- &rth );
- if ( err )
+ err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
+ &rth);
+ if (err)
return err;
-
/* put it into the cache */
hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos);
err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
- if ( err )
+ if (err)
return err;
-
+
/* forward hop information to multipath impl. */
- multipath_set_nhinfo(FIB_RES_NETWORK(*res),
+ multipath_set_nhinfo(rth,
+ FIB_RES_NETWORK(*res),
FIB_RES_NETMASK(*res),
res->prefixlen,
&FIB_RES_NH(*res));
-
- /* only for the last hop the reference count is handled
- outside */
- RTprint( KERN_DEBUG"%s: balanced entry created: %d\n",
- __FUNCTION__, rth );
- if ( hop == lasthop )
+ /* only for the last hop the reference count is handled
+ * outside
+ */
+ if (hop == lasthop)
atomic_set(&(skb->dst->__refcnt), 1);
}
return err;
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
}
@@ -2175,8 +2165,11 @@
rth->u.dst.flags= DST_HOST;
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (res->fi && res->fi->fib_nhs > 1)
- rth->u.dst.flags |= DST_BALANCED;
+ if (res->fi) {
+ rth->rt_multipath_alg = res->fi->fib_mp_alg;
+ if (res->fi->fib_nhs > 1)
+ rth->u.dst.flags |= DST_BALANCED;
+ }
#endif
if (in_dev->cnf.no_xfrm)
rth->u.dst.flags |= DST_NOXFRM;
@@ -2274,17 +2267,13 @@
unsigned char hop;
unsigned hash;
int err = -EINVAL;
- struct rtable* rth;
+ struct rtable *rth;
- if (res->fi && res->fi->fib_nhs > 1) {
+ if (res->fi && res->fi->fib_nhs > 1) {
unsigned char hopcount = res->fi->fib_nhs;
- RTprint( KERN_DEBUG"%s: entered (hopcount: %d, fl->oif: %d)\n",
- __FUNCTION__, hopcount, fl->oif);
- for ( hop = 0; hop < hopcount; ++hop ) {
+ for (hop = 0; hop < hopcount; hop++) {
struct net_device *dev2nexthop;
- RTprint( KERN_DEBUG"%s: hop %d of %d\n", __FUNCTION__,
- hop, hopcount );
res->nh_sel = hop;
@@ -2292,49 +2281,34 @@
dev2nexthop = FIB_RES_DEV(*res);
dev_hold(dev2nexthop);
- err = __mkroute_output(&rth, res, fl, oldflp,
+ err = __mkroute_output(&rth, res, fl, oldflp,
dev2nexthop, flags);
- /** FIXME remove debug code */
- RTprint( "%s: balanced entry created: %d " \
- " (GW: %u)\n",
- __FUNCTION__,
- &rth->u.dst,
- FIB_RES_GW(*res) );
-
- if ( err != 0 ) {
+ if (err != 0)
goto cleanup;
- }
- RTprint( KERN_DEBUG"%s: created successfully %d\n",
- __FUNCTION__, hop );
-
hash = rt_hash_code(oldflp->fl4_dst,
- oldflp->fl4_src ^
+ oldflp->fl4_src ^
(oldflp->oif << 5), tos);
err = rt_intern_hash(hash, rth, rp);
- RTprint( KERN_DEBUG"%s: hashed %d\n",
- __FUNCTION__, hop );
/* forward hop information to multipath impl. */
- multipath_set_nhinfo(FIB_RES_NETWORK(*res),
+ multipath_set_nhinfo(rth,
+ FIB_RES_NETWORK(*res),
FIB_RES_NETMASK(*res),
res->prefixlen,
&FIB_RES_NH(*res));
cleanup:
/* release work reference to output device */
dev_put(dev2nexthop);
-
- if ( err != 0 ) {
+
+ if (err != 0)
return err;
- }
}
- RTprint( "%s: exited loop\n", __FUNCTION__ );
atomic_set(&(*rp)->u.dst.__refcnt, 1);
return err;
- }
- else {
- return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
+ } else {
+ return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
flags);
}
#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
@@ -2557,9 +2531,11 @@
#endif
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK))) {
+
/* check for multipath routes and choose one if
- necessary */
- if (multipath_selectroute(flp, rth, rp)) {
+ * necessary
+ */
+ if (multipath_select_route(flp, rth, rp)) {
dst_hold(&(*rp)->u.dst);
RT_CACHE_STAT_INC(out_hit);
rcu_read_unlock_bh();
@@ -2639,6 +2615,13 @@
#ifdef CONFIG_NET_CLS_ROUTE
if (rt->u.dst.tclassid)
RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
+#endif
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ if (rt->rt_multipath_alg != IP_MP_ALG_NONE) {
+ __u32 alg = rt->rt_multipath_alg;
+
+ RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
+ }
#endif
if (rt->fl.iif)
RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html