Re: [PATCH 6/17 net-2.6.26] [NETNS]: Default arp parameters lookup.
On Tue, 2008-02-19 at 10:14 +0100, Daniel Lezcano wrote: Denis V. Lunev wrote: Default ARP parameters should be findable regardless of the context. Required to make inetdev_event working. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/neighbour.c |4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c895ad4..45ed620 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1275,9 +1275,7 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, struct neigh_parms *p; for (p = tbl-parms; p; p = p-next) { - if (p-net != net) - continue; - if ((p-dev p-dev-ifindex == ifindex) || + if ((p-dev p-dev-ifindex == ifindex p-net == net) || (!p-dev !ifindex)) return p; } If the values are: p-dev == NULL ifindex == 0 p-net != net The parms should not be taken into account and the looping must continue. But with this modification it is not the case, if we specify parms ifindex == 0, the first parms with the dev field set to NULL will be taken belonging or not to the right net. They should be taken. In the other case inetdev_event will fail for sure in the middle. You could check. These are ARP defaults and I do not see a problem for now to get them. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/17 net-2.6.26] [NETNS]: Default arp parameters lookup.
On Tue, 2008-02-19 at 10:51 +0100, Daniel Lezcano wrote: Denis V. Lunev wrote: On Tue, 2008-02-19 at 10:14 +0100, Daniel Lezcano wrote: Denis V. Lunev wrote: Default ARP parameters should be findable regardless of the context. Required to make inetdev_event working. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/neighbour.c |4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c895ad4..45ed620 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1275,9 +1275,7 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, struct neigh_parms *p; for (p = tbl-parms; p; p = p-next) { - if (p-net != net) - continue; - if ((p-dev p-dev-ifindex == ifindex) || + if ((p-dev p-dev-ifindex == ifindex p-net == net) || (!p-dev !ifindex)) return p; } If the values are: p-dev == NULL ifindex == 0 p-net != net The parms should not be taken into account and the looping must continue. But with this modification it is not the case, if we specify parms ifindex == 0, the first parms with the dev field set to NULL will be taken belonging or not to the right net. They should be taken. In the other case inetdev_event will fail for sure in the middle. You could check. These are ARP defaults and I do not see a problem for now to get them. Because there is a parms default per namespace. So several instances of them per nd table. That was the initial approach with Eric's patchset. These changes are not in mainstream and I do not want to touch ARP as this is not a simple thing. In reality ARP will be needed only when we'll have a real device inside a namespace. Right now I prefer to have minimal set of working changes to finish IP and upper layers. Regards, Den -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [NETNS]: Namespace leak in pneigh_lookup.
release_net is missed on the error path in pneigh_lookup. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/neighbour.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7bb6a9a..174e29e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -507,6 +507,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (tbl-pconstructor tbl-pconstructor(n)) { if (dev) dev_put(dev); + release_net(net); kfree(n); n = NULL; goto out; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 10/17 net-2.6.26] [NETNS]: Process ip_rt_redirect in the correct namespace.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c |7 +-- 1 files changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 525787b..44708ab 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1132,10 +1132,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev-ifindex, 0 }; struct netevent_redirect netevent; + struct net *net; if (!in_dev) return; + net = dev-nd_net; if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || ipv4_is_zeronet(new_gw)) @@ -1147,7 +1149,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, if (IN_DEV_SEC_REDIRECTS(in_dev) ip_fib_check_default(new_gw, dev)) goto reject_redirect; } else { - if (inet_addr_type(init_net, new_gw) != RTN_UNICAST) + if (inet_addr_type(net, new_gw) != RTN_UNICAST) goto reject_redirect; } @@ -1165,7 +1167,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth-fl.fl4_src != skeys[i] || rth-fl.oif != ikeys[k] || rth-fl.iif != 0 || - rth-rt_genid != atomic_read(rt_genid)) { + rth-rt_genid != atomic_read(rt_genid) || + rth-u.dst.dev-nd_net != net) { rthp = rth-u.dst.rt_next; continue; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 12/17 net-2.6.26] [NETNS]: Process /proc/net/rt_cache inside a namespace.
Show routing cache for a particular namespace only. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c | 10 +++--- 1 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 67df872..c11e6bf 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -273,6 +273,7 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { + struct seq_net_private p; int bucket; int genid; }; @@ -285,7 +286,8 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) rcu_read_lock_bh(); r = rcu_dereference(rt_hash_table[st-bucket].chain); while (r) { - if (r-rt_genid == st-genid) + if (r-u.dst.dev-nd_net == st-p.net + r-rt_genid == st-genid) return r; r = rcu_dereference(r-u.dst.rt_next); } @@ -312,6 +314,8 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) { while ((r = __rt_cache_get_next(st, r)) != NULL) { + if (r-u.dst.dev-nd_net != st-p.net) + continue; if (r-rt_genid == st-genid) break; } @@ -398,7 +402,7 @@ static const struct seq_operations rt_cache_seq_ops = { static int rt_cache_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, rt_cache_seq_ops, + return seq_open_net(inode, file, rt_cache_seq_ops, sizeof(struct rt_cache_iter_state)); } @@ -407,7 +411,7 @@ static const struct file_operations rt_cache_seq_fops = { .open= rt_cache_seq_open, .read= seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 13/17 net-2.6.26] [NETNS]: Register /proc/net/rt_cache for each namespace.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c | 24 +--- 1 files changed, 21 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c11e6bf..5f67eba 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -545,7 +545,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset, } #endif -static __init int ip_rt_proc_init(struct net *net) +static int __net_init ip_rt_do_proc_init(struct net *net) { struct proc_dir_entry *pde; @@ -577,8 +577,26 @@ err2: err1: return -ENOMEM; } + +static void __net_exit ip_rt_do_proc_exit(struct net *net) +{ + remove_proc_entry(rt_cache, net-proc_net_stat); + remove_proc_entry(rt_cache, net-proc_net); + remove_proc_entry(rt_acct, net-proc_net); +} + +static struct pernet_operations ip_rt_proc_ops __net_initdata = { + .init = ip_rt_do_proc_init, + .exit = ip_rt_do_proc_exit, +}; + +static int __init ip_rt_proc_init(void) +{ + return register_pernet_subsys(ip_rt_proc_ops); +} + #else -static inline int ip_rt_proc_init(struct net *net) +static inline int ip_rt_proc_init(void) { return 0; } @@ -3056,7 +3074,7 @@ int __init ip_rt_init(void) ip_rt_secret_interval; add_timer(rt_secret_timer); - if (ip_rt_proc_init(init_net)) + if (ip_rt_proc_init()) printk(KERN_ERR Unable to create route proc files\n); #ifdef CONFIG_XFRM xfrm_init(); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/17 net-2.6.26] [NETNS]: Default arp parameters lookup.
Default ARP parameters should be findable regardless of the context. Required to make inetdev_event working. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/neighbour.c |4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c895ad4..45ed620 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1275,9 +1275,7 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, struct neigh_parms *p; for (p = tbl-parms; p; p = p-next) { - if (p-net != net) - continue; - if ((p-dev p-dev-ifindex == ifindex) || + if ((p-dev p-dev-ifindex == ifindex p-net == net) || (!p-dev !ifindex)) return p; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 9/17 net-2.6.26] [NETNS]: DST cleanup routines should be called inside namespace.
Device inside the namespace can be started and downed. So, active routing cache should be cleaned up on device stop. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/dst.c |3 --- 1 files changed, 0 insertions(+), 3 deletions(-) diff --git a/net/core/dst.c b/net/core/dst.c index 7deef48..3a01a81 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -295,9 +295,6 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void struct net_device *dev = ptr; struct dst_entry *dst, *last = NULL; - if (dev-nd_net != init_net) - return NOTIFY_DONE; - switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/17 net-2.6.26] [IPV4]: rt_cache_get_next should take rt_genid into account.
In the other case /proc/net/rt_cache will look inconsistent in respect to genid. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Acked-by: Alexey Kuznetsov [EMAIL PROTECTED] --- net/ipv4/route.c | 18 +- 1 files changed, 13 insertions(+), 5 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 44708ab..67df872 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -294,7 +294,8 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) return r; } -static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) +static struct rtable *__rt_cache_get_next(struct rt_cache_iter_state *st, + struct rtable *r) { r = r-u.dst.rt_next; while (!r) { @@ -307,16 +308,23 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct r return rcu_dereference(r); } +static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, + struct rtable *r) +{ + while ((r = __rt_cache_get_next(st, r)) != NULL) { + if (r-rt_genid == st-genid) + break; + } + return r; +} + static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) { struct rtable *r = rt_cache_get_first(st); if (r) - while (pos (r = rt_cache_get_next(st, r))) { - if (r-rt_genid != st-genid) - continue; + while (pos (r = rt_cache_get_next(st, r))) --pos; - } return pos ? NULL : r; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/17 net-2.6.26] [NETNS]: Disable multicaststing configuration inside non-initial namespace.
Do not calls hooks from device notifiers and disallow configuration from ioctl/netlink layer. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/igmp.c | 39 +++ 1 files changed, 39 insertions(+), 0 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 732cd07..d3f34a7 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1198,6 +1198,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + for (im=in_dev-mc_list; im; im=im-next) { if (im-multiaddr == addr) { im-users++; @@ -1277,6 +1280,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + for (ip=in_dev-mc_list; (i=*ip)!=NULL; ip=i-next) { if (i-multiaddr==addr) { if (--i-users == 0) { @@ -1304,6 +1310,9 @@ void ip_mc_down(struct in_device *in_dev) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + for (i=in_dev-mc_list; i; i=i-next) igmp_group_dropped(i); @@ -1324,6 +1333,9 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + in_dev-mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST in_dev-mr_gq_running = 0; @@ -1347,6 +1359,9 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for (i=in_dev-mc_list; i; i=i-next) @@ -1363,6 +1378,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + /* Deactivate timers */ ip_mc_down(in_dev); @@ -1744,6 +1762,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) if (!ipv4_is_multicast(addr)) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); in_dev = ip_mc_find_dev(imr); @@ -1812,6 +1833,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) u32 ifindex; int ret = -EADDRNOTAVAIL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); in_dev = ip_mc_find_dev(imr); ifindex = imr-imr_ifindex; @@ -1857,6 +1881,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!ipv4_is_multicast(addr)) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); imr.imr_multiaddr.s_addr = mreqs-imr_multiaddr; @@ -1990,6 +2017,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf-imsf_fmode != MCAST_EXCLUDE) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); imr.imr_multiaddr.s_addr = msf-imsf_multiaddr; @@ -2070,6 +2100,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!ipv4_is_multicast(addr)) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); imr.imr_multiaddr.s_addr = msf-imsf_multiaddr; @@ -2132,6 +2165,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); err = -EADDRNOTAVAIL; @@ -2216,6 +2252,9 @@ void ip_mc_drop_socket(struct sock *sk) if (inet-mc_list == NULL) return; + if (sk-sk_net != init_net) + return; + rtnl_lock(); while ((iml = inet-mc_list) != NULL) { struct in_device *in_dev; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 14/17 net-2.6.26] [NETNS]: Process devinet ioctl in the correct namespace.
Add namespace parameter to devinet_ioctl and locate device inside it for state changes. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/linux/inetdevice.h |2 +- net/ipv4/af_inet.c |7 --- net/ipv4/devinet.c |6 +++--- net/ipv4/ipconfig.c|2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index fc4e3db..da05ab4 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -129,7 +129,7 @@ extern int unregister_inetaddr_notifier(struct notifier_block *nb); extern struct net_device *ip_dev_find(struct net *net, __be32 addr); extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); -extern int devinet_ioctl(unsigned int cmd, void __user *); +extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); extern voiddevinet_init(void); extern struct in_device*inetdev_by_index(struct net *, int); extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 09ca529..c270080 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -784,6 +784,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock-sk; int err = 0; + struct net *net = sk-sk_net; switch (cmd) { case SIOCGSTAMP: @@ -795,12 +796,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCADDRT: case SIOCDELRT: case SIOCRTMSG: - err = ip_rt_ioctl(sk-sk_net, cmd, (void __user *)arg); + err = ip_rt_ioctl(net, cmd, (void __user *)arg); break; case SIOCDARP: case SIOCGARP: case SIOCSARP: - err = arp_ioctl(sk-sk_net, cmd, (void __user *)arg); + err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -813,7 +814,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFPFLAGS: case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - err = devinet_ioctl(cmd, (void __user *)arg); + err = devinet_ioctl(net, cmd, (void __user *)arg); break; default: if (sk-sk_prot-ioctl) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 963e711..f7e78b7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -595,7 +595,7 @@ static __inline__ int inet_abc_len(__be32 addr) } -int devinet_ioctl(unsigned int cmd, void __user *arg) +int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; struct sockaddr_in sin_orig; @@ -624,7 +624,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) *colon = 0; #ifdef CONFIG_KMOD - dev_load(init_net, ifr.ifr_name); + dev_load(net, ifr.ifr_name); #endif switch (cmd) { @@ -665,7 +665,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - if ((dev = __dev_get_by_name(init_net, ifr.ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) goto done; if (colon) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index a52b585..009d78f 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -291,7 +291,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) mm_segment_t oldfs = get_fs(); set_fs(get_ds()); - res = devinet_ioctl(cmd, (struct ifreq __user *) arg); + res = devinet_ioctl(init_net, cmd, (struct ifreq __user *) arg); set_fs(oldfs); return res; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/17 net-2.6.26] [NETNS]: Disable inetaddr notifiers in namespaces other than initial.
ip_fib_init is kept enabled. It is already namespace-aware. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- drivers/net/bonding/bond_main.c |3 +++ drivers/net/via-velocity.c |3 +++ drivers/s390/net/qeth_main.c|3 +++ net/sctp/protocol.c |3 +++ 4 files changed, 12 insertions(+), 0 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0942d82..9666434 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3511,6 +3511,9 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, struct bonding *bond, *bond_next; struct vlan_entry *vlan, *vlan_next; + if (ifa-ifa_dev-dev-nd_net != init_net) + return NOTIFY_DONE; + list_for_each_entry_safe(bond, bond_next, bond_dev_list, bond_list) { if (bond-dev == event_dev) { switch (event) { diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index c50fdee..1525e8a 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -3464,6 +3464,9 @@ static int velocity_netdev_event(struct notifier_block *nb, unsigned long notifi struct velocity_info *vptr; unsigned long flags; + if (dev-nd_net != init_net) + return NOTIFY_DONE; + spin_lock_irqsave(velocity_dev_list_lock, flags); list_for_each_entry(vptr, velocity_dev_list, list) { if (vptr-dev == dev) { diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 62606ce..d063e9e 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -8622,6 +8622,9 @@ qeth_ip_event(struct notifier_block *this, struct qeth_ipaddr *addr; struct qeth_card *card; + if (dev-nd_net != init_net) + return NOTIFY_DONE; + QETH_DBF_TEXT(trace,3,ipevent); card = qeth_get_card_from_dev(dev); if (!card) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 22a1657..4475f7e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -629,6 +629,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + if (ifa-ifa_dev-dev-nd_net != init_net) + return NOTIFY_DONE; + switch (ev) { case NETDEV_UP: addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/17 net-2.6.26] [NETNS]: Enable inetdev_event notifier.
After all these preparations it is time to enable main IPv4 device initialization routine inside namespace. It is safe do this now. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c |3 --- 1 files changed, 0 insertions(+), 3 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f282b26..963e711 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1044,9 +1044,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); - if (dev-nd_net != init_net) - return NOTIFY_DONE; - ASSERT_RTNL(); if (!in_dev) { -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 16/17 net-2.6.26] [NETNS]: Enable IPv4 address manipulations inside namespace.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c |9 - 1 files changed, 0 insertions(+), 9 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f7e78b7..aa23d10 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg ASSERT_RTNL(); - if (net != init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); if (err 0) goto errout; @@ -560,9 +557,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg ASSERT_RTNL(); - if (net != init_net) - return -EINVAL; - ifa = rtm_to_ifaddr(net, nlh); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -1169,9 +1163,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) struct in_ifaddr *ifa; int s_ip_idx, s_idx = cb-args[0]; - if (net != init_net) - return 0; - s_ip_idx = ip_idx = cb-args[1]; idx = 0; for_each_netdev(net, dev) { -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 17/17 net-2.6.26] [NETNS]: Process inet_select_addr inside a namespace.
The context is available from a network device passed in. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index aa23d10..033670d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -871,6 +871,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { __be32 addr = 0; struct in_device *in_dev; + struct net *net = dev-nd_net; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -899,7 +900,7 @@ no_in_dev: */ read_lock(dev_base_lock); rcu_read_lock(); - for_each_netdev(init_net, dev) { + for_each_netdev(net, dev) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 15/17 net-2.6.26] [NETNS]: Enable all routing manipulation via netlink inside namespace.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c | 16 1 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5f67eba..79e2e8a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2702,9 +2702,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void int err; struct sk_buff *skb; - if (net != init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err 0) goto errout; @@ -2734,7 +2731,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(init_net, iif); + dev = __dev_get_by_index(net, iif); if (dev == NULL) { err = -ENODEV; goto errout_free; @@ -2760,7 +2757,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void }, .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, }; - err = ip_route_output_key(init_net, rt, fl); + err = ip_route_output_key(net, rt, fl); } if (err) @@ -2771,11 +2768,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void rt-rt_flags |= RTCF_NOTIFY; err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh-nlmsg_seq, - RTM_NEWROUTE, 0, 0); + RTM_NEWROUTE, 0, 0); if (err = 0) goto errout_free; - err = rtnl_unicast(skb, init_net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout: return err; @@ -2789,6 +2786,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) struct rtable *rt; int h, s_h; int idx, s_idx; + struct net *net; + + net = skb-sk-sk_net; s_h = cb-args[0]; if (s_h 0) @@ -2798,7 +2798,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock_bh(); for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; rt = rcu_dereference(rt-u.dst.rt_next), idx++) { - if (idx s_idx) + if (rt-u.dst.dev-nd_net != net || idx s_idx) continue; if (rt-rt_genid != atomic_read(rt_genid)) continue; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/17 net-2.6.26] [NETNS]: Register neighbour table parameters in the correct namespace.
neigh_sysctl_register should register sysctl entries inside correct namespace to avoid naming conflict. Typical example is a loopback. Entries for it present in all namespaces. Required to make inetdev_event working. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/neighbour.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7bb6a9a..c895ad4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2732,7 +2732,8 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id; - t-sysctl_header = register_sysctl_paths(neigh_path, t-neigh_vars); + t-sysctl_header = + register_net_sysctl_table(p-net, neigh_path, t-neigh_vars); if (!t-sysctl_header) goto free_procname; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/17 net-2.6.26] [NETFILTER]: Consolidate masq_inet_event and masq_device_event.
They do exactly the same job. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/netfilter/ipt_MASQUERADE.c | 14 ++ 1 files changed, 2 insertions(+), 12 deletions(-) diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d80fee8..313b3fc 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ((struct in_ifaddr *)ptr)-ifa_dev-dev; - - if (event == NETDEV_DOWN) { - /* IP address was deleted. Search entire table for - conntracks which were associated with that device, - and forget them. */ - NF_CT_ASSERT(dev-ifindex != 0); - - nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev-ifindex); - } - - return NOTIFY_DONE; + struct net_device *dev = ((struct in_ifaddr *)ptr)-ifa_dev-dev; + return masq_device_event(this, event, dev); } static struct notifier_block masq_dev_notifier = { -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/17 net-2.6.26] [IPV4]: Remove check for ifa-ifa_dev != NULL.
This is a callback registered to inet address notifier chain. The check is useless as: - ifa-ifa_dev is always != NULL - similar checks are abscent in all other notifiers. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/atm/clip.c |4 1 files changed, 0 insertions(+), 4 deletions(-) diff --git a/net/atm/clip.c b/net/atm/clip.c index 86b885e..dd96440 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -648,10 +648,6 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, struct in_device *in_dev; in_dev = ((struct in_ifaddr *)ifa)-ifa_dev; - if (!in_dev || !in_dev-dev) { - printk(KERN_WARNING clip_inet_event: no device\n); - return NOTIFY_DONE; - } /* * Transitions are of the down-change-up type, so it's sufficient to * handle the change on up. -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/17 net-2.6.26] [IPV4]: Remove ifa != NULL check.
This is a callback registered to inet address notifier chain. The check is useless as: - ifa is always != NULL - similar checks are abscent in all other notifiers. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- drivers/net/via-velocity.c | 22 ++ 1 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index cc0addb..c50fdee 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -3460,21 +3460,19 @@ static int velocity_resume(struct pci_dev *pdev) static int velocity_netdev_event(struct notifier_block *nb, unsigned long notification, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; + struct net_device *dev = ifa-ifa_dev-dev; + struct velocity_info *vptr; + unsigned long flags; - if (ifa) { - struct net_device *dev = ifa-ifa_dev-dev; - struct velocity_info *vptr; - unsigned long flags; - - spin_lock_irqsave(velocity_dev_list_lock, flags); - list_for_each_entry(vptr, velocity_dev_list, list) { - if (vptr-dev == dev) { - velocity_get_ip(vptr); - break; - } + spin_lock_irqsave(velocity_dev_list_lock, flags); + list_for_each_entry(vptr, velocity_dev_list, list) { + if (vptr-dev == dev) { + velocity_get_ip(vptr); + break; } - spin_unlock_irqrestore(velocity_dev_list_lock, flags); } + spin_unlock_irqrestore(velocity_dev_list_lock, flags); + return NOTIFY_DONE; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/17] Finish IPv4 infrastructure namespacing.
This set finally allows to manipulate with network devices inside a namespace and allows to configure them [via netlink]. 'route' is not yet supported (but prepared to) as it requires a socket. Additionally, better routing cache support is added. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Network namespace and tc?
Hello, Stephen! Namespaces are not fully implemented yet :) Right now we we have only basic infrastructure in the mainstream and, currently, we can't even run TCP in different namespace :( We hope this will be changed very soon. These marks (net != init_net) are used to - mark places we need to modify - ensure that we do not break initial namespace. Regards, Den On Wed, 2008-02-13 at 15:59 -0800, Stephen Hemminger wrote: It looks like tc filter won't work on alternate namespaces: /* Add/change/delete/get a filter node */ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { ... if (net != init_net) return -EINVAL; Haven't played with namespace virtualization yet, but what else is not supported? Where is this documented? -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [IPV4]: Remove warning in node_set_parent.
ugly :), but Acked-by: Denis V. Lunev [EMAIL PROTECTED] On Mon, 2008-02-11 at 11:48 -0800, Stephen Hemminger wrote: On Mon, 11 Feb 2008 11:47:17 +0300 Denis V. Lunev [EMAIL PROTECTED] wrote: net/ipv4/fib_trie.c: In function 'node_set_parent': net/ipv4/fib_trie.c:184: warning: assignment makes integer from pointer without a cast Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/fib_trie.c |7 --- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f5fba3f..1753cd4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -177,10 +177,11 @@ static inline struct tnode *node_parent_rcu(struct node *node) return rcu_dereference(ret); } -static inline void node_set_parent(struct node *node, struct tnode *ptr) +static inline void node_set_parent(struct node *node, struct tnode *__ptr) { - rcu_assign_pointer(node-parent, - (unsigned long)ptr | NODE_TYPE(node)); + struct node *ptr; + ptr = (struct node *)((unsigned long)__ptr | NODE_TYPE(node)); + rcu_assign_pointer(node-parent, ptr); } static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) No, this causes new warning from assigning pointer (ptr) to integer node-parent. Why not just change rcupdate.h to do the right thing. From a00f7cbf1c2f2282eced236e1e8b99b0fecd213a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger [EMAIL PROTECTED] Date: Mon, 11 Feb 2008 11:28:13 -0800 Subject: [PATCH] eliminate warnings when rcu_assign_pointer is used with unsigned long It is reasonable to use RCU with non-pointer values, and describe the optimization. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- include/linux/rcupdate.h | 13 +++-- 1 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 37a642c..c44ac87 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -172,14 +172,15 @@ struct rcu_head { * structure after the pointer assignment. More importantly, this * call documents which pointers will be dereferenced by RCU read-side * code. + * + * If value is the NULL (constant 0), then no barrier is needed. */ -#define rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - smp_wmb(); \ - (p) = (v); \ +#define rcu_assign_pointer(p, v) \ + ({ \ + if (!(__builtin_constant_p(v) v))\ + smp_wmb(); \ + (p) = (v); \ }) /** -- 1.5.3.8 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [IPV4]: Remove warning in node_set_parent.
net/ipv4/fib_trie.c: In function 'node_set_parent': net/ipv4/fib_trie.c:184: warning: assignment makes integer from pointer without a cast Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/fib_trie.c |7 --- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f5fba3f..1753cd4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -177,10 +177,11 @@ static inline struct tnode *node_parent_rcu(struct node *node) return rcu_dereference(ret); } -static inline void node_set_parent(struct node *node, struct tnode *ptr) +static inline void node_set_parent(struct node *node, struct tnode *__ptr) { - rcu_assign_pointer(node-parent, - (unsigned long)ptr | NODE_TYPE(node)); + struct node *ptr; + ptr = (struct node *)((unsigned long)__ptr | NODE_TYPE(node)); + rcu_assign_pointer(node-parent, ptr); } static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: cgroup: limit network bandwidth
Hello, Andrea! I have occasionally seen your patch on LWN (missed one in netdev@) and have two words about. May be this is not too late. I have missed my entire mailbox yesterday and have not followed the discussion. Pls forgive me. Rate-limiting message receive is nothing good at all. First, if we talk about i386, the most important resource is low memory. There are no more than 1 Gb of it. You suggest to keep it used more time than usual and this usage will not reduce network traffic to the node for UDP cases. For TCP the situation is slightly better. But not quite a big. For a case of rather slow group with a bug traffic you will just eat 64kb * Nsockets of receive buffers. So, resource usage is just increased for a case. This is unfortunate. In order to proper rate-limiting you need to calculate memory used - dropping incoming packets early for UDP - manage TCP window on the base of buffer memory used by the cgroup Regards, Den -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [NETNS] Remove unused member (dst_net) of dst_ops.
This has been added by the Daniel Lezcano [EMAIL PROTECTED] in the commit d4fa26ff44e31c2636a985e3092e2cd55d8045de. It looks to me a preparatory staff for IPv6 namespacing. I think this is not needed in 2.6.25 but will be required in 2.6.26 very soon. Regards, Den On Fri, 2008-02-08 at 13:24 +0200, Rami Rosen wrote: Hi, This patches removes dst_net member (a pointer to struct net) of dst_ops (/include/net/dst.h). Current network namespace implementation does not use it at all. Denis - any comments ? Regards, Rami Rosen Signed-off-by: Rami Rosen [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [IPV6]: dst_entry leak in ip4ip6_err.
The result of the ip_route_output is not assigned to skb. This means that - it is leaked - possible OOPS below dereferrencing skb-dst - no ICMP message for this case Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv6/ip6_tunnel.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9031e52..cd94064 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -550,6 +550,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ip_rt_put(rt); goto out; } + skb2-dst = (struct dst_entry *)rt; } else { ip_rt_put(rt); if (ip_route_input(skb2, eiph-daddr, eiph-saddr, eiph-tos, -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [IGMP]: Optimize kfree_skb in igmp_rcv.
Merge error paths inside igmp_rcv. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/igmp.c | 13 ++--- 1 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index fe2e6cd..d3f34a7 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -922,13 +922,11 @@ int igmp_rcv(struct sk_buff *skb) struct in_device *in_dev = in_dev_get(skb-dev); int len = skb-len; - if (in_dev==NULL) { - kfree_skb(skb); - return 0; - } + if (in_dev == NULL) + goto drop; if (!pskb_may_pull(skb, sizeof(struct igmphdr))) - goto drop; + goto drop_ref; switch (skb-ip_summed) { case CHECKSUM_COMPLETE: @@ -938,7 +936,7 @@ int igmp_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb-csum = 0; if (__skb_checksum_complete(skb)) - goto drop; + goto drop_ref; } ih = igmp_hdr(skb); @@ -972,8 +970,9 @@ int igmp_rcv(struct sk_buff *skb) break; } -drop: +drop_ref: in_dev_put(in_dev); +drop: kfree_skb(skb); return 0; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 17/17] [NETNS]: Process inet_select_addr inside a namespace.
The context is available from a network device passed in. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c |4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index aa23d10..d06a4e6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -871,12 +871,14 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { __be32 addr = 0; struct in_device *in_dev; + struct net *net; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto no_in_dev; + net = dev-nd_net; for_primary_ifa(in_dev) { if (ifa-ifa_scope scope) continue; @@ -899,7 +901,7 @@ no_in_dev: */ read_lock(dev_base_lock); rcu_read_lock(); - for_each_netdev(init_net, dev) { + for_each_netdev(net, dev) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 15/17] [NETNS]: Enable all routing manipulation via netlink inside namespace.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c | 16 1 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8a31e33..92ff622 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2672,9 +2672,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void int err; struct sk_buff *skb; - if (net != init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err 0) goto errout; @@ -2704,7 +2701,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(init_net, iif); + dev = __dev_get_by_index(net, iif); if (dev == NULL) { err = -ENODEV; goto errout_free; @@ -2730,7 +2727,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void }, .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, }; - err = ip_route_output_key(init_net, rt, fl); + err = ip_route_output_key(net, rt, fl); } if (err) @@ -2741,11 +2738,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void rt-rt_flags |= RTCF_NOTIFY; err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh-nlmsg_seq, - RTM_NEWROUTE, 0, 0); + RTM_NEWROUTE, 0, 0); if (err = 0) goto errout_free; - err = rtnl_unicast(skb, init_net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout: return err; @@ -2759,6 +2756,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) struct rtable *rt; int h, s_h; int idx, s_idx; + struct net *net; + + net = skb-sk-sk_net; s_h = cb-args[0]; if (s_h 0) @@ -2768,7 +2768,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock_bh(); for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; rt = rcu_dereference(rt-u.dst.rt_next), idx++) { - if (idx s_idx) + if (rt-u.dst.dev-nd_net != net || idx s_idx) continue; if (rt-rt_genid != atomic_read(rt_genid)) continue; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 14/17] [NETNS]: Process devinet ioctl in the correct namespace.
Add namespace parameter to devinet_ioctl and locate device inside it for a state changes. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/linux/inetdevice.h |2 +- net/ipv4/af_inet.c |7 --- net/ipv4/devinet.c |6 +++--- net/ipv4/ipconfig.c|2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index fc4e3db..da05ab4 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -129,7 +129,7 @@ extern int unregister_inetaddr_notifier(struct notifier_block *nb); extern struct net_device *ip_dev_find(struct net *net, __be32 addr); extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); -extern int devinet_ioctl(unsigned int cmd, void __user *); +extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); extern voiddevinet_init(void); extern struct in_device*inetdev_by_index(struct net *, int); extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 09ca529..c270080 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -784,6 +784,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock-sk; int err = 0; + struct net *net = sk-sk_net; switch (cmd) { case SIOCGSTAMP: @@ -795,12 +796,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCADDRT: case SIOCDELRT: case SIOCRTMSG: - err = ip_rt_ioctl(sk-sk_net, cmd, (void __user *)arg); + err = ip_rt_ioctl(net, cmd, (void __user *)arg); break; case SIOCDARP: case SIOCGARP: case SIOCSARP: - err = arp_ioctl(sk-sk_net, cmd, (void __user *)arg); + err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -813,7 +814,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFPFLAGS: case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - err = devinet_ioctl(cmd, (void __user *)arg); + err = devinet_ioctl(net, cmd, (void __user *)arg); break; default: if (sk-sk_prot-ioctl) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f282b26..a06fcae 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -595,7 +595,7 @@ static __inline__ int inet_abc_len(__be32 addr) } -int devinet_ioctl(unsigned int cmd, void __user *arg) +int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; struct sockaddr_in sin_orig; @@ -624,7 +624,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) *colon = 0; #ifdef CONFIG_KMOD - dev_load(init_net, ifr.ifr_name); + dev_load(net, ifr.ifr_name); #endif switch (cmd) { @@ -665,7 +665,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - if ((dev = __dev_get_by_name(init_net, ifr.ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) goto done; if (colon) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index a52b585..009d78f 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -291,7 +291,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) mm_segment_t oldfs = get_fs(); set_fs(get_ds()); - res = devinet_ioctl(cmd, (struct ifreq __user *) arg); + res = devinet_ioctl(init_net, cmd, (struct ifreq __user *) arg); set_fs(oldfs); return res; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 13/17] [NETNS]: Register /proc/net/rt_cache for each namespace.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c | 24 +--- 1 files changed, 21 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cc002d8..84da794 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -545,7 +545,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset, } #endif -static __init int ip_rt_proc_init(struct net *net) +static int __net_init ip_rt_do_proc_init(struct net *net) { struct proc_dir_entry *pde; @@ -577,8 +577,26 @@ err2: err1: return -ENOMEM; } + +static void __net_exit ip_rt_do_proc_exit(struct net *net) +{ + remove_proc_entry(rt_cache, net-proc_net_stat); + remove_proc_entry(rt_cache, net-proc_net); + remove_proc_entry(rt_acct, net-proc_net); +} + +static struct pernet_operations ip_rt_proc_ops __net_initdata = { + .init = ip_rt_do_proc_init, + .exit = ip_rt_do_proc_exit, +}; + +static int __init ip_rt_proc_init(void) +{ + return register_pernet_subsys(ip_rt_proc_ops); +} + #else -static inline int ip_rt_proc_init(struct net *net) +static inline int ip_rt_proc_init(void) { return 0; } @@ -3056,7 +3074,7 @@ int __init ip_rt_init(void) ip_rt_secret_interval; add_timer(rt_secret_timer); - if (ip_rt_proc_init(init_net)) + if (ip_rt_proc_init()) printk(KERN_ERR Unable to create route proc files\n); #ifdef CONFIG_XFRM xfrm_init(); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 10/17] [NETNS]: Process ip_rt_redirect in the correct namespace.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c |7 +-- 1 files changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8842ecb..8a31e33 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1132,10 +1132,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev-ifindex, 0 }; struct netevent_redirect netevent; + struct net *net; if (!in_dev) return; + net = dev-nd_net; if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || ipv4_is_zeronet(new_gw)) @@ -1147,7 +1149,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, if (IN_DEV_SEC_REDIRECTS(in_dev) ip_fib_check_default(new_gw, dev)) goto reject_redirect; } else { - if (inet_addr_type(init_net, new_gw) != RTN_UNICAST) + if (inet_addr_type(net, new_gw) != RTN_UNICAST) goto reject_redirect; } @@ -1165,7 +1167,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth-fl.fl4_src != skeys[i] || rth-fl.oif != ikeys[k] || rth-fl.iif != 0 || - rth-rt_genid != atomic_read(rt_genid)) { + rth-rt_genid != atomic_read(rt_genid) || + rth-u.dst.dev-nd_net != net) { rthp = rth-u.dst.rt_next; continue; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/17] [NETNS]: Enable inetdev_event notifier.
After all these preparations it is time to enable main IPv4 device initialization routine inside namespace. It is safe do this now. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c |3 --- 1 files changed, 0 insertions(+), 3 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a06fcae..f7e78b7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1044,9 +1044,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); - if (dev-nd_net != init_net) - return NOTIFY_DONE; - ASSERT_RTNL(); if (!in_dev) { -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/17] [NETNS]: Disable address notifiers in namespaces other than initial.
ip_fib_init is keeped enabled. It is already namespace-aware. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- drivers/net/bonding/bond_main.c |3 +++ drivers/net/via-velocity.c |3 +++ drivers/s390/net/qeth_main.c|3 +++ net/sctp/protocol.c |3 +++ 4 files changed, 12 insertions(+), 0 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0942d82..9666434 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3511,6 +3511,9 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, struct bonding *bond, *bond_next; struct vlan_entry *vlan, *vlan_next; + if (ifa-ifa_dev-dev-nd_net != init_net) + return NOTIFY_DONE; + list_for_each_entry_safe(bond, bond_next, bond_dev_list, bond_list) { if (bond-dev == event_dev) { switch (event) { diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 7ff4509..d659834 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -3464,6 +3464,9 @@ static int velocity_netdev_event(struct notifier_block *nb, unsigned long notifi struct velocity_info *vptr; unsigned long flags; + if (dev-nd_net != init_net) + return NOTIFY_DONE; + spin_lock_irqsave(velocity_dev_list_lock, flags); list_for_each_entry(vptr, velocity_dev_list, list) { if (vptr-dev == dev) { diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 62606ce..d063e9e 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -8622,6 +8622,9 @@ qeth_ip_event(struct notifier_block *this, struct qeth_ipaddr *addr; struct qeth_card *card; + if (dev-nd_net != init_net) + return NOTIFY_DONE; + QETH_DBF_TEXT(trace,3,ipevent); card = qeth_get_card_from_dev(dev); if (!card) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1339742..20f7e4a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -629,6 +629,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + if (ifa-ifa_dev-dev-nd_net != init_net) + return NOTIFY_DONE; + switch (ev) { case NETDEV_UP: addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/17] [NETNS]: Default arp parameters lookup.
Default ARP parameters should be findable regardless of the context. Required to make inetdev_event working. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/neighbour.c |4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1ed7b0a..ea44b8d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1281,9 +1281,7 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, struct neigh_parms *p; for (p = tbl-parms; p; p = p-next) { - if (p-net != net) - continue; - if ((p-dev p-dev-ifindex == ifindex) || + if ((p-dev p-dev-ifindex == ifindex p-net == net) || (!p-dev !ifindex)) return p; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/17] [NETNS]: Register neighbour parameters of the net device in the correct namespace.
neigh_sysctl_register should register sysctl entries inside correct namespace to avoid naming conflict. Typical example is a loopback. Entries for it present in all namespaces. Required to make inetdev_event working. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/neighbour.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a16cf1e..1ed7b0a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2738,7 +2738,8 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id; - t-sysctl_header = register_sysctl_paths(neigh_path, t-neigh_vars); + t-sysctl_header = + register_net_sysctl_table(p-net, neigh_path, t-neigh_vars); if (!t-sysctl_header) goto free_procname; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/17] [IPV4]: Consolidate masq_inet_event and masq_device_event.
They do exactly the same job. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/netfilter/ipt_MASQUERADE.c | 14 ++ 1 files changed, 2 insertions(+), 12 deletions(-) diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d80fee8..313b3fc 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ((struct in_ifaddr *)ptr)-ifa_dev-dev; - - if (event == NETDEV_DOWN) { - /* IP address was deleted. Search entire table for - conntracks which were associated with that device, - and forget them. */ - NF_CT_ASSERT(dev-ifindex != 0); - - nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev-ifindex); - } - - return NOTIFY_DONE; + struct net_device *dev = ((struct in_ifaddr *)ptr)-ifa_dev-dev; + return masq_device_event(this, event, dev); } static struct notifier_block masq_dev_notifier = { -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/17] [IPV4]: Remove ifa != NULL check.
This is a callback registered to inet address notifiers chains. The check is useless as: - ifa is always != NULL - similar checks are abscent in all other notifiers. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- drivers/net/via-velocity.c | 22 ++ 1 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 8c9fb82..7ff4509 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -3460,21 +3460,19 @@ static int velocity_resume(struct pci_dev *pdev) static int velocity_netdev_event(struct notifier_block *nb, unsigned long notification, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; + struct net_device *dev = ifa-ifa_dev-dev; + struct velocity_info *vptr; + unsigned long flags; - if (ifa) { - struct net_device *dev = ifa-ifa_dev-dev; - struct velocity_info *vptr; - unsigned long flags; - - spin_lock_irqsave(velocity_dev_list_lock, flags); - list_for_each_entry(vptr, velocity_dev_list, list) { - if (vptr-dev == dev) { - velocity_get_ip(vptr); - break; - } + spin_lock_irqsave(velocity_dev_list_lock, flags); + list_for_each_entry(vptr, velocity_dev_list, list) { + if (vptr-dev == dev) { + velocity_get_ip(vptr); + break; } - spin_unlock_irqrestore(velocity_dev_list_lock, flags); } + spin_unlock_irqrestore(velocity_dev_list_lock, flags); + return NOTIFY_DONE; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/17] Finish IPv4 infrastructure namespacing
This set finally allows to manipulate with network devices inside a namespace and allows to configure them [via netlink]. 'route' is not yet supported (but prepared to). Additionally, better routing cache support is added. By the way, working ICMP is behind a couple of patches after this set :) Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/17] [IPV4]: Remove check for ifa-ifa_dev != NULL.
This is a callback registered to inet address notifiers chains. The check is useless as: - ifa-ifa_dev is always != NULL - similar checks are abscent in all other notifiers. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/atm/clip.c |4 1 files changed, 0 insertions(+), 4 deletions(-) diff --git a/net/atm/clip.c b/net/atm/clip.c index 86b885e..dd96440 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -648,10 +648,6 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, struct in_device *in_dev; in_dev = ((struct in_ifaddr *)ifa)-ifa_dev; - if (!in_dev || !in_dev-dev) { - printk(KERN_WARNING clip_inet_event: no device\n); - return NOTIFY_DONE; - } /* * Transitions are of the down-change-up type, so it's sufficient to * handle the change on up. -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 9/17] [NETNS]: DST cleanup routines should be called inside namespace.
Device inside the namespace can be started and downed. So, active routing cache should be cleaned up on device stop. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/dst.c |3 --- 1 files changed, 0 insertions(+), 3 deletions(-) diff --git a/net/core/dst.c b/net/core/dst.c index 7deef48..3a01a81 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -295,9 +295,6 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void struct net_device *dev = ptr; struct dst_entry *dst, *last = NULL; - if (dev-nd_net != init_net) - return NOTIFY_DONE; - switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/17] [NETNS]: Disable multicaststing configuration inside namespace.
Do not calls hooks from device notifiers and disallow configuration from ioctl/netlink layer. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/igmp.c | 39 +++ 1 files changed, 39 insertions(+), 0 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 994648b..fe2e6cd 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1199,6 +1199,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + for (im=in_dev-mc_list; im; im=im-next) { if (im-multiaddr == addr) { im-users++; @@ -1278,6 +1281,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + for (ip=in_dev-mc_list; (i=*ip)!=NULL; ip=i-next) { if (i-multiaddr==addr) { if (--i-users == 0) { @@ -1305,6 +1311,9 @@ void ip_mc_down(struct in_device *in_dev) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + for (i=in_dev-mc_list; i; i=i-next) igmp_group_dropped(i); @@ -1325,6 +1334,9 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + in_dev-mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST in_dev-mr_gq_running = 0; @@ -1348,6 +1360,9 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for (i=in_dev-mc_list; i; i=i-next) @@ -1364,6 +1379,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev) ASSERT_RTNL(); + if (in_dev-dev-nd_net != init_net) + return; + /* Deactivate timers */ ip_mc_down(in_dev); @@ -1745,6 +1763,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) if (!ipv4_is_multicast(addr)) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); in_dev = ip_mc_find_dev(imr); @@ -1813,6 +1834,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) u32 ifindex; int ret = -EADDRNOTAVAIL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); in_dev = ip_mc_find_dev(imr); ifindex = imr-imr_ifindex; @@ -1858,6 +1882,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!ipv4_is_multicast(addr)) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); imr.imr_multiaddr.s_addr = mreqs-imr_multiaddr; @@ -1991,6 +2018,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf-imsf_fmode != MCAST_EXCLUDE) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); imr.imr_multiaddr.s_addr = msf-imsf_multiaddr; @@ -2071,6 +2101,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!ipv4_is_multicast(addr)) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); imr.imr_multiaddr.s_addr = msf-imsf_multiaddr; @@ -2133,6 +2166,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; + if (sk-sk_net != init_net) + return -EPROTONOSUPPORT; + rtnl_lock(); err = -EADDRNOTAVAIL; @@ -2217,6 +2253,9 @@ void ip_mc_drop_socket(struct sock *sk) if (inet-mc_list == NULL) return; + if (sk-sk_net != init_net) + return; + rtnl_lock(); while ((iml = inet-mc_list) != NULL) { struct in_device *in_dev; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/17] [IPV4]: rt_cache_get_next should take rt_genid into account.
In the other case /proc/net/rt_cache will look inconsistent in respect to genid. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Acked-by: Alexey Kuznetsov [EMAIL PROTECTED] --- net/ipv4/route.c | 18 +- 1 files changed, 13 insertions(+), 5 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 92ff622..b03de57 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -294,7 +294,8 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) return r; } -static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) +static struct rtable *__rt_cache_get_next(struct rt_cache_iter_state *st, + struct rtable *r) { r = r-u.dst.rt_next; while (!r) { @@ -307,16 +308,23 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct r return rcu_dereference(r); } +static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, + struct rtable *r) +{ + while ((r = __rt_cache_get_next(st, r)) != NULL) { + if (r-rt_genid == st-genid) + break; + } + return r; +} + static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) { struct rtable *r = rt_cache_get_first(st); if (r) - while (pos (r = rt_cache_get_next(st, r))) { - if (r-rt_genid != st-genid) - continue; + while (pos (r = rt_cache_get_next(st, r))) --pos; - } return pos ? NULL : r; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 12/17] [NETNS]: Process /proc/net/rt_cache inside a namespace.
Show routing cache for a particular namespace only. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c | 10 +++--- 1 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b03de57..cc002d8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -273,6 +273,7 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { + struct seq_net_private p; int bucket; int genid; }; @@ -285,7 +286,8 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) rcu_read_lock_bh(); r = rcu_dereference(rt_hash_table[st-bucket].chain); while (r) { - if (r-rt_genid == st-genid) + if (r-u.dst.dev-nd_net == st-p.net + r-rt_genid == st-genid) return r; r = rcu_dereference(r-u.dst.rt_next); } @@ -312,6 +314,8 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) { while ((r = __rt_cache_get_next(st, r)) != NULL) { + if (r-u.dst.dev-nd_net != st-p.net) + continue; if (r-rt_genid == st-genid) break; } @@ -398,7 +402,7 @@ static const struct seq_operations rt_cache_seq_ops = { static int rt_cache_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, rt_cache_seq_ops, + return seq_open_net(inode, file, rt_cache_seq_ops, sizeof(struct rt_cache_iter_state)); } @@ -407,7 +411,7 @@ static const struct file_operations rt_cache_seq_fops = { .open= rt_cache_seq_open, .read= seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 16/17] [NETNS]: Enable IPv4 address manipulations inside namespace.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c |9 - 1 files changed, 0 insertions(+), 9 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f7e78b7..aa23d10 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg ASSERT_RTNL(); - if (net != init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); if (err 0) goto errout; @@ -560,9 +557,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg ASSERT_RTNL(); - if (net != init_net) - return -EINVAL; - ifa = rtm_to_ifaddr(net, nlh); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -1169,9 +1163,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) struct in_ifaddr *ifa; int s_ip_idx, s_idx = cb-args[0]; - if (net != init_net) - return 0; - s_ip_idx = ip_idx = cb-args[1]; idx = 0; for_each_netdev(net, dev) { -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/17] Finish IPv4 infrastructure namespacing
David Miller wrote: What part of no new features did you not understand? OOPS, again :( sorry, I miss that thread -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [IPV4]: Formatting fix for /proc/net/fib_trie.
The line in the /proc/net/fib_trie for route with TOS specified - has extra \n at the end - does not have a space after route scope like below. |-- 1.1.1.1 /32 universe UNICASTtos =1 Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/fib_trie.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 35851c9..f5fba3f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2431,8 +2431,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) rtn_type(buf2, sizeof(buf2), fa-fa_type)); if (fa-fa_tos) - seq_printf(seq, tos =%d\n, - fa-fa_tos); + seq_printf(seq, tos=%d, fa-fa_tos); seq_putc(seq, '\n'); } } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [IPV4]: Fix compiler error with CONFIG_PROC_FS=n
Johann Felix Soden wrote: From: Johann Felix Soden [EMAIL PROTECTED] Handle CONFIG_PROC_FS=n in net/ipv4/fib_frontend.c because: net/ipv4/fib_frontend.c: In function 'fib_net_init': net/ipv4/fib_frontend.c:1032: error: implicit declaration of function 'fib_proc_init' net/ipv4/fib_frontend.c: In function 'fib_net_exit': net/ipv4/fib_frontend.c:1047: error: implicit declaration of function 'fib_proc_exit' Signed-off-by: Johann Felix Soden [EMAIL PROTECTED] Fix from Li Zefan is already commited. (cc8274f50f2ad9a97a837451f63a0a3e65f7f490) -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3] [RAW]: proc output cleanups.
yesterday Adrian Bunk noticed, that the commit commit 42a73808ed4f30b739eb52bcbb33a02fe62ceef5 Author: Pavel Emelyanov [EMAIL PROTECTED] Date: Mon Nov 19 22:38:33 2007 -0800 is somewhat strange. Basically, the commit is obviously wrong as the content of the /proc/net/raw6 is incorrect due to it. This series of patches fixes original problem and slightly cleanups the code around. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] [RAW]: Cleanup IPv4 raw_seq_show.
There is no need to use 128 bytes on the stack at all. Clean the code in the IPv6 style. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/raw.c | 24 +++- 1 files changed, 7 insertions(+), 17 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 507cbfe..830f19e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -927,7 +927,7 @@ void raw_seq_stop(struct seq_file *seq, void *v) } EXPORT_SYMBOL_GPL(raw_seq_stop); -static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) +static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) { struct inet_sock *inet = inet_sk(sp); __be32 dest = inet-daddr, @@ -935,33 +935,23 @@ static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) __u16 destp = 0, srcp = inet-num; - sprintf(tmpbuf, %4d: %08X:%04X %08X:%04X + seq_printf(seq, %4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d, i, src, srcp, dest, destp, sp-sk_state, atomic_read(sp-sk_wmem_alloc), atomic_read(sp-sk_rmem_alloc), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(sp-sk_refcnt), sp, atomic_read(sp-sk_drops)); - return tmpbuf; } -#define TMPSZ 128 - static int raw_seq_show(struct seq_file *seq, void *v) { - char tmpbuf[TMPSZ+1]; - if (v == SEQ_START_TOKEN) - seq_printf(seq, %-*s\n, TMPSZ-1, -sl local_address rem_address st tx_queue - rx_queue tr tm-when retrnsmt uid timeout - inode drops); - else { - struct raw_iter_state *state = raw_seq_private(seq); - - seq_printf(seq, %-*s\n, TMPSZ-1, - get_raw_sock(v, tmpbuf, state-bucket)); - } + seq_printf(seq, sl local_address rem_address st tx_queue + rx_queue tr tm-when retrnsmt uid timeout + inode drops\n); + else + raw_sock_seq_show(seq, v, raw_seq_private(seq)-bucket); return 0; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] [RAW]: Wrong content of the /proc/net/raw6.
The address of IPv6 raw sockets was shown in the wrong format, from IPv4 ones. The problem has been introduced by the commit 42a73808ed4f30b739eb52bcbb33a02fe62ceef5 Author: Pavel Emelyanov [EMAIL PROTECTED] Date: Mon Nov 19 22:38:33 2007 -0800 Thanks to Adrian Bunk who originally noticed the problem. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/raw.h |3 ++- net/ipv4/raw.c|8 net/ipv6/raw.c|2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/net/raw.h b/include/net/raw.h index c7ea7a2..1828f81 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -48,7 +48,8 @@ struct raw_iter_state { void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h); +int raw_seq_open(struct inode *ino, struct file *file, +struct raw_hashinfo *h, const struct seq_operations *ops); #endif diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 830f19e..a3002fe 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -962,13 +962,13 @@ static const struct seq_operations raw_seq_ops = { .show = raw_seq_show, }; -int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h) +int raw_seq_open(struct inode *ino, struct file *file, +struct raw_hashinfo *h, const struct seq_operations *ops) { int err; struct raw_iter_state *i; - err = seq_open_net(ino, file, raw_seq_ops, - sizeof(struct raw_iter_state)); + err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state)); if (err 0) return err; @@ -980,7 +980,7 @@ EXPORT_SYMBOL_GPL(raw_seq_open); static int raw_v4_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(inode, file, raw_v4_hashinfo); + return raw_seq_open(inode, file, raw_v4_hashinfo, raw_seq_ops); } static const struct file_operations raw_seq_fops = { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a2cf499..8897ccf 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1262,7 +1262,7 @@ static const struct seq_operations raw6_seq_ops = { static int raw6_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(inode, file, raw_v6_hashinfo); + return raw_seq_open(inode, file, raw_v6_hashinfo, raw6_seq_ops); } static const struct file_operations raw6_seq_fops = { -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] [RAW]: Family check in the /proc/net/raw[6] is extra.
Different hashtables are used for IPv6 and IPv4 raw sockets, so no need to check the socket family in the iterator over hashtables. Clean this out. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/raw.h |4 +--- net/ipv4/raw.c| 12 net/ipv6/raw.c|2 +- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/include/net/raw.h b/include/net/raw.h index cca81d8..c7ea7a2 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -41,7 +41,6 @@ extern void raw_proc_exit(void); struct raw_iter_state { struct seq_net_private p; int bucket; - unsigned short family; struct raw_hashinfo *h; }; @@ -49,8 +48,7 @@ struct raw_iter_state { void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h, - unsigned short family); +int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h); #endif diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f863c3d..507cbfe 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -862,8 +862,7 @@ static struct sock *raw_get_first(struct seq_file *seq) struct hlist_node *node; sk_for_each(sk, node, state-h-ht[state-bucket]) - if (sk-sk_net == state-p.net - sk-sk_family == state-family) + if (sk-sk_net == state-p.net) goto found; } sk = NULL; @@ -879,8 +878,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) sk = sk_next(sk); try_again: ; - } while (sk sk-sk_net != state-p.net - sk-sk_family != state-family); + } while (sk sk-sk_net != state-p.net); if (!sk ++state-bucket RAW_HTABLE_SIZE) { sk = sk_head(state-h-ht[state-bucket]); @@ -974,8 +972,7 @@ static const struct seq_operations raw_seq_ops = { .show = raw_seq_show, }; -int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h, - unsigned short family) +int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h) { int err; struct raw_iter_state *i; @@ -987,14 +984,13 @@ int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h, i = raw_seq_private((struct seq_file *)file-private_data); i-h = h; - i-family = family; return 0; } EXPORT_SYMBOL_GPL(raw_seq_open); static int raw_v4_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(inode, file, raw_v4_hashinfo, PF_INET); + return raw_seq_open(inode, file, raw_v4_hashinfo); } static const struct file_operations raw_seq_fops = { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d61c63d..a2cf499 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1262,7 +1262,7 @@ static const struct seq_operations raw6_seq_ops = { static int raw6_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(inode, file, raw_v6_hashinfo, PF_INET6); + return raw_seq_open(inode, file, raw_v6_hashinfo); } static const struct file_operations raw6_seq_fops = { -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/6] preparations to enable netdevice notifiers inside a namespace (resend)
Here are some preparations and cleanups to enable network device/inet address notifiers inside a namespace. This set of patches has been originally sent last Friday. One cleanup patch from the original series is dropped as wrong, thanks to Daniel Lezcano. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/6] [IPV4]: Fix memory leak on error path during FIB initialization.
net-ipv4.fib_table_hash is not freed when fib4_rules_init failed. The problem has been introduced by the following commit. commit c8050bf6d84785a7edd2e81591e8f833231477e8 Author: Denis V. Lunev [EMAIL PROTECTED] Date: Thu Jan 10 03:28:24 2008 -0800 Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/fib_frontend.c | 10 +- 1 files changed, 9 insertions(+), 1 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d282618..d0507f4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -975,6 +975,7 @@ static struct notifier_block fib_netdev_notifier = { static int __net_init ip_fib_net_init(struct net *net) { + int err; unsigned int i; net-ipv4.fib_table_hash = kzalloc( @@ -985,7 +986,14 @@ static int __net_init ip_fib_net_init(struct net *net) for (i = 0; i FIB_TABLE_HASHSZ; i++) INIT_HLIST_HEAD(net-ipv4.fib_table_hash[i]); - return fib4_rules_init(net); + err = fib4_rules_init(net); + if (err 0) + goto fail; + return 0; + +fail: + kfree(net-ipv4.fib_table_hash); + return err; } static void __net_exit ip_fib_net_exit(struct net *net) -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/6] [IPV4]: Small style cleanup of the error path in rtm_to_ifaddr.
Remove error code assignment inside brackets on failure. The code looks better if the error is assigned before condition check. Also, the compiler treats this better. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c | 21 - 1 files changed, 8 insertions(+), 13 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 21f71bf..9da4c68 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -492,39 +492,34 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) struct ifaddrmsg *ifm; struct net_device *dev; struct in_device *in_dev; - int err = -EINVAL; + int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); if (err 0) goto errout; ifm = nlmsg_data(nlh); - if (ifm-ifa_prefixlen 32 || tb[IFA_LOCAL] == NULL) { - err = -EINVAL; + err = -EINVAL; + if (ifm-ifa_prefixlen 32 || tb[IFA_LOCAL] == NULL) goto errout; - } dev = __dev_get_by_index(init_net, ifm-ifa_index); - if (dev == NULL) { - err = -ENODEV; + err = -ENODEV; + if (dev == NULL) goto errout; - } in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL) { - err = -ENOBUFS; + err = -ENOBUFS; + if (in_dev == NULL) goto errout; - } ifa = inet_alloc_ifa(); - if (ifa == NULL) { + if (ifa == NULL) /* * A potential indev allocation can be left alive, it stays * assigned to its device and is destroy with it. */ - err = -ENOBUFS; goto errout; - } ipv4_devconf_setall(in_dev); in_dev_hold(in_dev); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/6] [NETNS]: Add a namespace mark to fib_info.
This is required to make fib_info lookups namespace aware. In the other case initial namespace devices are marked as dead in the local routing table during other namespace stop. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h |1 + net/ipv4/fib_semantics.c |8 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1b2f008..cb0df37 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -69,6 +69,7 @@ struct fib_nh { struct fib_info { struct hlist_node fib_hash; struct hlist_node fib_lhash; + struct net *fib_net; int fib_treeref; atomic_tfib_clntref; int fib_dead; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5beff2e..97cc494 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -687,6 +687,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) struct fib_info *fi = NULL; struct fib_info *ofi; int nhs = 1; + struct net *net = cfg-fc_nlinfo.nl_net; /* Fast check to catch the most weird cases */ if (fib_props[cfg-fc_type].scope cfg-fc_scope) @@ -727,6 +728,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; + fi-fib_net = net; fi-fib_protocol = cfg-fc_protocol; fi-fib_flags = cfg-fc_flags; fi-fib_priority = cfg-fc_priority; @@ -798,8 +800,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (nhs != 1 || nh-nh_gw) goto err_inval; nh-nh_scope = RT_SCOPE_NOWHERE; - nh-nh_dev = dev_get_by_index(cfg-fc_nlinfo.nl_net, - fi-fib_nh-nh_oif); + nh-nh_dev = dev_get_by_index(net, fi-fib_nh-nh_oif); err = -ENODEV; if (nh-nh_dev == NULL) goto failure; @@ -813,8 +814,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (fi-fib_prefsrc) { if (cfg-fc_type != RTN_LOCAL || !cfg-fc_dst || fi-fib_prefsrc != cfg-fc_dst) - if (inet_addr_type(cfg-fc_nlinfo.nl_net, - fi-fib_prefsrc) != RTN_LOCAL) + if (inet_addr_type(net, fi-fib_prefsrc) != RTN_LOCAL) goto err_inval; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/6] [NETNS]: Process interface address manipulation routines in the namespace.
The namespace is available when required except rtm_to_ifaddr. Add namespace argument to it. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c | 14 -- 1 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e55c85e..6a6e92e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -485,7 +485,7 @@ errout: return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; @@ -503,7 +503,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) if (ifm-ifa_prefixlen 32 || tb[IFA_LOCAL] == NULL) goto errout; - dev = __dev_get_by_index(init_net, ifm-ifa_index); + dev = __dev_get_by_index(net, ifm-ifa_index); err = -ENODEV; if (dev == NULL) goto errout; @@ -571,7 +571,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (net != init_net) return -EINVAL; - ifa = rtm_to_ifaddr(nlh); + ifa = rtm_to_ifaddr(net, nlh); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -1189,7 +1189,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) s_ip_idx = ip_idx = cb-args[1]; idx = 0; - for_each_netdev(init_net, dev) { + for_each_netdev(net, dev) { if (idx s_idx) goto cont; if (idx s_idx) @@ -1223,7 +1223,9 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, struct sk_buff *skb; u32 seq = nlh ? nlh-nlmsg_seq : 0; int err = -ENOBUFS; + struct net *net; + net = ifa-ifa_dev-dev-nd_net; skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); if (skb == NULL) goto errout; @@ -1235,10 +1237,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); errout: if (err 0) - rtnl_set_sk_err(init_net, RTNLGRP_IPV4_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } #ifdef CONFIG_SYSCTL -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/6] [IPV4]: fib_sync_down rework.
fib_sync_down can be called with an address and with a device. In reality it is called either with address OR with a device. The codepath inside is completely different, so lets separate it into two calls for these two cases. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h |3 +- net/ipv4/fib_frontend.c |4 +- net/ipv4/fib_semantics.c | 104 +++-- 3 files changed, 57 insertions(+), 54 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9daa60b..1b2f008 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -218,7 +218,8 @@ extern void fib_select_default(struct net *net, const struct flowi *flp, /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); -extern int fib_sync_down(__be32 local, struct net_device *dev, int force); +extern int fib_sync_down_dev(struct net_device *dev, int force); +extern int fib_sync_down_addr(__be32 local); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d0507f4..d69ffa2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down(ifa-ifa_local, NULL, 0)) + if (fib_sync_down_addr(ifa-ifa_local)) fib_flush(dev-nd_net); } } @@ -898,7 +898,7 @@ static void nl_fib_lookup_exit(struct net *net) static void fib_disable_ip(struct net_device *dev, int force) { - if (fib_sync_down(0, dev, force)) + if (fib_sync_down_dev(dev, force)) fib_flush(dev-nd_net); rt_cache_flush(0); arp_ifdown(dev); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c791286..5beff2e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1031,70 +1031,72 @@ nla_put_failure: referring to it. - device went down - we must shutdown all nexthops going via it. */ - -int fib_sync_down(__be32 local, struct net_device *dev, int force) +int fib_sync_down_addr(__be32 local) { int ret = 0; - int scope = RT_SCOPE_NOWHERE; - - if (force) - scope = -1; + unsigned int hash = fib_laddr_hashfn(local); + struct hlist_head *head = fib_info_laddrhash[hash]; + struct hlist_node *node; + struct fib_info *fi; - if (local fib_info_laddrhash) { - unsigned int hash = fib_laddr_hashfn(local); - struct hlist_head *head = fib_info_laddrhash[hash]; - struct hlist_node *node; - struct fib_info *fi; + if (fib_info_laddrhash == NULL || local == 0) + return 0; - hlist_for_each_entry(fi, node, head, fib_lhash) { - if (fi-fib_prefsrc == local) { - fi-fib_flags |= RTNH_F_DEAD; - ret++; - } + hlist_for_each_entry(fi, node, head, fib_lhash) { + if (fi-fib_prefsrc == local) { + fi-fib_flags |= RTNH_F_DEAD; + ret++; } } + return ret; +} - if (dev) { - struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev-ifindex); - struct hlist_head *head = fib_info_devhash[hash]; - struct hlist_node *node; - struct fib_nh *nh; +int fib_sync_down_dev(struct net_device *dev, int force) +{ + int ret = 0; + int scope = RT_SCOPE_NOWHERE; + struct fib_info *prev_fi = NULL; + unsigned int hash = fib_devindex_hashfn(dev-ifindex); + struct hlist_head *head = fib_info_devhash[hash]; + struct hlist_node *node; + struct fib_nh *nh; - hlist_for_each_entry(nh, node, head, nh_hash) { - struct fib_info *fi = nh-nh_parent; - int dead; + if (force) + scope = -1; - BUG_ON(!fi-fib_nhs); - if (nh-nh_dev != dev || fi == prev_fi) - continue; - prev_fi = fi; - dead = 0; - change_nexthops(fi) { - if (nh-nh_flagsRTNH_F_DEAD) - dead++; - else if (nh-nh_dev == dev -nh-nh_scope != scope
[PATCH 6/6] [NETNS]: Lookup in FIB semantic hashes taking into account the namespace.
The namespace is not available in the fib_sync_down_addr, add it as a parameter. Looking up a device by the pointer to it is OK. Looking up using a result from fib_trie/fib_hash table lookup is also safe. No need to fix that at all. So, just fix lookup by address and insertion to the hash table path. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h |2 +- net/ipv4/fib_frontend.c |2 +- net/ipv4/fib_semantics.c |6 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index cb0df37..90d1175 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -220,7 +220,7 @@ extern void fib_select_default(struct net *net, const struct flowi *flp, /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); extern int fib_sync_down_dev(struct net_device *dev, int force); -extern int fib_sync_down_addr(__be32 local); +extern int fib_sync_down_addr(struct net *net, __be32 local); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d69ffa2..86ff271 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down_addr(ifa-ifa_local)) + if (fib_sync_down_addr(dev-nd_net, ifa-ifa_local)) fib_flush(dev-nd_net); } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 97cc494..a13c847 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -229,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) head = fib_info_hash[hash]; hlist_for_each_entry(fi, node, head, fib_hash) { + if (fi-fib_net != nfi-fib_net) + continue; if (fi-fib_nhs != nfi-fib_nhs) continue; if (nfi-fib_protocol == fi-fib_protocol @@ -1031,7 +1033,7 @@ nla_put_failure: referring to it. - device went down - we must shutdown all nexthops going via it. */ -int fib_sync_down_addr(__be32 local) +int fib_sync_down_addr(struct net *net, __be32 local) { int ret = 0; unsigned int hash = fib_laddr_hashfn(local); @@ -1043,6 +1045,8 @@ int fib_sync_down_addr(__be32 local) return 0; hlist_for_each_entry(fi, node, head, fib_lhash) { + if (fi-fib_net != net) + continue; if (fi-fib_prefsrc == local) { fi-fib_flags |= RTNH_F_DEAD; ret++; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Debugfs compile fix.
Debugfs is not compiled without CONFIG_SYSFS in net-2.6 tree. Move kobject_create_and_add under appropriate ifdef. The fix looks correct from a first glance, but may be the dependency should be added into the Kconfig. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- fs/debugfs/inode.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index d26e282..61cc937 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -432,9 +432,11 @@ static int __init debugfs_init(void) { int retval; +#ifdef CONFIG_SYSFS debug_kobj = kobject_create_and_add(debug, kernel_kobj); if (!debug_kobj) return -EINVAL; +#endif retval = register_filesystem(debug_fs_type); if (retval) -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/7 net-2.6.25] [NETNS]: Lookup in FIB semantic hashes taking into account the namespace.
The namespace is not available in the fib_sync_down_addr, add it as a parameter. Looking up a device by the pointer to it is OK. Looking up using a result from fib_trie/fib_hash table lookup is also safe. No need to fix that at all. So, just fix lookup by address and insertion to the hash table path. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h |2 +- net/ipv4/fib_frontend.c |2 +- net/ipv4/fib_semantics.c |6 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index cb0df37..90d1175 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -220,7 +220,7 @@ extern void fib_select_default(struct net *net, const struct flowi *flp, /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); extern int fib_sync_down_dev(struct net_device *dev, int force); -extern int fib_sync_down_addr(__be32 local); +extern int fib_sync_down_addr(struct net *net, __be32 local); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d69ffa2..86ff271 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down_addr(ifa-ifa_local)) + if (fib_sync_down_addr(dev-nd_net, ifa-ifa_local)) fib_flush(dev-nd_net); } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 97cc494..a13c847 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -229,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) head = fib_info_hash[hash]; hlist_for_each_entry(fi, node, head, fib_hash) { + if (fi-fib_net != nfi-fib_net) + continue; if (fi-fib_nhs != nfi-fib_nhs) continue; if (nfi-fib_protocol == fi-fib_protocol @@ -1031,7 +1033,7 @@ nla_put_failure: referring to it. - device went down - we must shutdown all nexthops going via it. */ -int fib_sync_down_addr(__be32 local) +int fib_sync_down_addr(struct net *net, __be32 local) { int ret = 0; unsigned int hash = fib_laddr_hashfn(local); @@ -1043,6 +1045,8 @@ int fib_sync_down_addr(__be32 local) return 0; hlist_for_each_entry(fi, node, head, fib_lhash) { + if (fi-fib_net != net) + continue; if (fi-fib_prefsrc == local) { fi-fib_flags |= RTNH_F_DEAD; ret++; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/7 net-2.6.25] [IPV4]: Fix memory leak on error path during FIB initialization.
net-ipv4.fib_table_hash is not freed when fib4_rules_init failed. The problem has been introduced by the following commit. commit c8050bf6d84785a7edd2e81591e8f833231477e8 Author: Denis V. Lunev [EMAIL PROTECTED] Date: Thu Jan 10 03:28:24 2008 -0800 Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/fib_frontend.c | 10 +- 1 files changed, 9 insertions(+), 1 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d282618..d0507f4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -975,6 +975,7 @@ static struct notifier_block fib_netdev_notifier = { static int __net_init ip_fib_net_init(struct net *net) { + int err; unsigned int i; net-ipv4.fib_table_hash = kzalloc( @@ -985,7 +986,14 @@ static int __net_init ip_fib_net_init(struct net *net) for (i = 0; i FIB_TABLE_HASHSZ; i++) INIT_HLIST_HEAD(net-ipv4.fib_table_hash[i]); - return fib4_rules_init(net); + err = fib4_rules_init(net); + if (err 0) + goto fail; + return 0; + +fail: + kfree(net-ipv4.fib_table_hash); + return err; } static void __net_exit ip_fib_net_exit(struct net *net) -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/7 net-2.6.25] [IPV4]: Small style cleanup of the error path in rtm_to_ifaddr.
Remove error code assignment inside brackets on failure. The code looks better if the error is assigned before condition check. Also, the compiler treats this better. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c | 21 - 1 files changed, 8 insertions(+), 13 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 21f71bf..9da4c68 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -492,39 +492,34 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) struct ifaddrmsg *ifm; struct net_device *dev; struct in_device *in_dev; - int err = -EINVAL; + int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); if (err 0) goto errout; ifm = nlmsg_data(nlh); - if (ifm-ifa_prefixlen 32 || tb[IFA_LOCAL] == NULL) { - err = -EINVAL; + err = -EINVAL; + if (ifm-ifa_prefixlen 32 || tb[IFA_LOCAL] == NULL) goto errout; - } dev = __dev_get_by_index(init_net, ifm-ifa_index); - if (dev == NULL) { - err = -ENODEV; + err = -ENODEV; + if (dev == NULL) goto errout; - } in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL) { - err = -ENOBUFS; + err = -ENOBUFS; + if (in_dev == NULL) goto errout; - } ifa = inet_alloc_ifa(); - if (ifa == NULL) { + if (ifa == NULL) /* * A potential indev allocation can be left alive, it stays * assigned to its device and is destroy with it. */ - err = -ENOBUFS; goto errout; - } ipv4_devconf_setall(in_dev); in_dev_hold(in_dev); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/7 net-2.6.25] [NETNS]: Add a namespace mark to fib_info.
This is required to make fib_info lookups namespace aware. In the other case initial namespace devices are marked as dead in the local routing table during other namespace stop. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h |1 + net/ipv4/fib_semantics.c |8 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1b2f008..cb0df37 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -69,6 +69,7 @@ struct fib_nh { struct fib_info { struct hlist_node fib_hash; struct hlist_node fib_lhash; + struct net *fib_net; int fib_treeref; atomic_tfib_clntref; int fib_dead; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5beff2e..97cc494 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -687,6 +687,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) struct fib_info *fi = NULL; struct fib_info *ofi; int nhs = 1; + struct net *net = cfg-fc_nlinfo.nl_net; /* Fast check to catch the most weird cases */ if (fib_props[cfg-fc_type].scope cfg-fc_scope) @@ -727,6 +728,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; + fi-fib_net = net; fi-fib_protocol = cfg-fc_protocol; fi-fib_flags = cfg-fc_flags; fi-fib_priority = cfg-fc_priority; @@ -798,8 +800,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (nhs != 1 || nh-nh_gw) goto err_inval; nh-nh_scope = RT_SCOPE_NOWHERE; - nh-nh_dev = dev_get_by_index(cfg-fc_nlinfo.nl_net, - fi-fib_nh-nh_oif); + nh-nh_dev = dev_get_by_index(net, fi-fib_nh-nh_oif); err = -ENODEV; if (nh-nh_dev == NULL) goto failure; @@ -813,8 +814,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (fi-fib_prefsrc) { if (cfg-fc_type != RTN_LOCAL || !cfg-fc_dst || fi-fib_prefsrc != cfg-fc_dst) - if (inet_addr_type(cfg-fc_nlinfo.nl_net, - fi-fib_prefsrc) != RTN_LOCAL) + if (inet_addr_type(net, fi-fib_prefsrc) != RTN_LOCAL) goto err_inval; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/7 net-2.6.25] [NETNS]: Process interface address manipulation routines in the namespace.
The namespace is available when required except rtm_to_ifaddr. Add namespace argument to it. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c | 14 -- 1 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e55c85e..6a6e92e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -485,7 +485,7 @@ errout: return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; @@ -503,7 +503,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) if (ifm-ifa_prefixlen 32 || tb[IFA_LOCAL] == NULL) goto errout; - dev = __dev_get_by_index(init_net, ifm-ifa_index); + dev = __dev_get_by_index(net, ifm-ifa_index); err = -ENODEV; if (dev == NULL) goto errout; @@ -571,7 +571,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (net != init_net) return -EINVAL; - ifa = rtm_to_ifaddr(nlh); + ifa = rtm_to_ifaddr(net, nlh); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -1189,7 +1189,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) s_ip_idx = ip_idx = cb-args[1]; idx = 0; - for_each_netdev(init_net, dev) { + for_each_netdev(net, dev) { if (idx s_idx) goto cont; if (idx s_idx) @@ -1223,7 +1223,9 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, struct sk_buff *skb; u32 seq = nlh ? nlh-nlmsg_seq : 0; int err = -ENOBUFS; + struct net *net; + net = ifa-ifa_dev-dev-nd_net; skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); if (skb == NULL) goto errout; @@ -1235,10 +1237,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); errout: if (err 0) - rtnl_set_sk_err(init_net, RTNLGRP_IPV4_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } #ifdef CONFIG_SYSCTL -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/7 net-2.6.25] [IPV4]: fib_sync_down rework.
fib_sync_down can be called with an address and with a device. In reality it is called either with address OR with a device. The codepath inside is completely different, so lets separate it into two calls for these two cases. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h |3 +- net/ipv4/fib_frontend.c |4 +- net/ipv4/fib_semantics.c | 104 +++-- 3 files changed, 57 insertions(+), 54 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9daa60b..1b2f008 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -218,7 +218,8 @@ extern void fib_select_default(struct net *net, const struct flowi *flp, /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); -extern int fib_sync_down(__be32 local, struct net_device *dev, int force); +extern int fib_sync_down_dev(struct net_device *dev, int force); +extern int fib_sync_down_addr(__be32 local); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d0507f4..d69ffa2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down(ifa-ifa_local, NULL, 0)) + if (fib_sync_down_addr(ifa-ifa_local)) fib_flush(dev-nd_net); } } @@ -898,7 +898,7 @@ static void nl_fib_lookup_exit(struct net *net) static void fib_disable_ip(struct net_device *dev, int force) { - if (fib_sync_down(0, dev, force)) + if (fib_sync_down_dev(dev, force)) fib_flush(dev-nd_net); rt_cache_flush(0); arp_ifdown(dev); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c791286..5beff2e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1031,70 +1031,72 @@ nla_put_failure: referring to it. - device went down - we must shutdown all nexthops going via it. */ - -int fib_sync_down(__be32 local, struct net_device *dev, int force) +int fib_sync_down_addr(__be32 local) { int ret = 0; - int scope = RT_SCOPE_NOWHERE; - - if (force) - scope = -1; + unsigned int hash = fib_laddr_hashfn(local); + struct hlist_head *head = fib_info_laddrhash[hash]; + struct hlist_node *node; + struct fib_info *fi; - if (local fib_info_laddrhash) { - unsigned int hash = fib_laddr_hashfn(local); - struct hlist_head *head = fib_info_laddrhash[hash]; - struct hlist_node *node; - struct fib_info *fi; + if (fib_info_laddrhash == NULL || local == 0) + return 0; - hlist_for_each_entry(fi, node, head, fib_lhash) { - if (fi-fib_prefsrc == local) { - fi-fib_flags |= RTNH_F_DEAD; - ret++; - } + hlist_for_each_entry(fi, node, head, fib_lhash) { + if (fi-fib_prefsrc == local) { + fi-fib_flags |= RTNH_F_DEAD; + ret++; } } + return ret; +} - if (dev) { - struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev-ifindex); - struct hlist_head *head = fib_info_devhash[hash]; - struct hlist_node *node; - struct fib_nh *nh; +int fib_sync_down_dev(struct net_device *dev, int force) +{ + int ret = 0; + int scope = RT_SCOPE_NOWHERE; + struct fib_info *prev_fi = NULL; + unsigned int hash = fib_devindex_hashfn(dev-ifindex); + struct hlist_head *head = fib_info_devhash[hash]; + struct hlist_node *node; + struct fib_nh *nh; - hlist_for_each_entry(nh, node, head, nh_hash) { - struct fib_info *fi = nh-nh_parent; - int dead; + if (force) + scope = -1; - BUG_ON(!fi-fib_nhs); - if (nh-nh_dev != dev || fi == prev_fi) - continue; - prev_fi = fi; - dead = 0; - change_nexthops(fi) { - if (nh-nh_flagsRTNH_F_DEAD) - dead++; - else if (nh-nh_dev == dev -nh-nh_scope != scope
[PATCH 3/7 net-2.6.25] [IPV4]: Prohibit assignment of 0.0.0.0 as interface address.
I could hardly imagine why sombady needs to assign 0.0.0.0 as an interface address or interface destination address. The kernel will behave in a strage way in several places if this is possible, as ifa_local != 0 is considered as initialized/non-initialized state of the ifa. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/devinet.c | 12 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9da4c68..e55c85e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -534,7 +534,13 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) ifa-ifa_dev = in_dev; ifa-ifa_local = nla_get_be32(tb[IFA_LOCAL]); + err = -EINVAL; + if (ifa-ifa_local == htonl(INADDR_ANY)) + goto fail_free; + ifa-ifa_address = nla_get_be32(tb[IFA_ADDRESS]); + if (ifa-ifa_address == htonl(INADDR_ANY)) + goto fail_free; if (tb[IFA_BROADCAST]) ifa-ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); @@ -549,6 +555,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) return ifa; +fail_free: + inet_free_ifa(ifa); errout: return ERR_PTR(err); } @@ -736,6 +744,8 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) ret = -EINVAL; if (inet_abc_len(sin-sin_addr.s_addr) 0) break; + if (sin-sin_addr.s_addr == INADDR_ANY) + break; if (!ifa) { ret = -ENOBUFS; @@ -786,6 +796,8 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) ret = -EINVAL; if (inet_abc_len(sin-sin_addr.s_addr) 0) break; + if (sin-sin_addr.s_addr == INADDR_ANY) + break; ret = 0; inet_del_ifa(in_dev, ifap, 0); ifa-ifa_address = sin-sin_addr.s_addr; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/7 net-2.6.25] [IPV4]: Prohibit assignment of 0.0.0.0 as interface address.
Daniel Lezcano wrote: Denis V. Lunev wrote: I could hardly imagine why sombady needs to assign 0.0.0.0 as an interface address or interface destination address. The kernel will behave in a strage way in several places if this is possible, as ifa_local != 0 is considered as initialized/non-initialized state of the ifa. AFAICS, we should be able to set at an interface address to 0.0.0.0, in order to remove an IP address from an interface and keep this one up. I see two trivial cases: * remove the ipv4 on an interface but continue to use it through ipv6 * move ipv4 address from the interface to an attached bridge For this case there is an IOCTL/netlink remove IP address. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/7 net-2.6.25] [IPV4]: Prohibit assignment of 0.0.0.0 as interface address.
Daniel Lezcano wrote: Denis V. Lunev wrote: Daniel Lezcano wrote: Denis V. Lunev wrote: I could hardly imagine why sombady needs to assign 0.0.0.0 as an interface address or interface destination address. The kernel will behave in a strage way in several places if this is possible, as ifa_local != 0 is considered as initialized/non-initialized state of the ifa. AFAICS, we should be able to set at an interface address to 0.0.0.0, in order to remove an IP address from an interface and keep this one up. I see two trivial cases: * remove the ipv4 on an interface but continue to use it through ipv6 * move ipv4 address from the interface to an attached bridge For this case there is an IOCTL/netlink remove IP address. And I forgot to mention the general broadcast. This is need for the dhcp protocol. If you are not able to set your interface to 0.0.0.0, you will be not able to send a 255.255.255.255 broadcast message to have your IP address. OK. Dave, pls disregard this patch. I suspect that others in the set should not intersect with this one. To summarize the discussion: there is the only reason for this assignment: old IOCTL interface does not have a way to remove IP address except this, though netlink has a method for it that's why I am a little bit confused :) This is handled in the __inet_insert_ifa: ifa is just removed there and, correctly, ifa with 0.0.0.0 address can't exists in the kernel. Sorry :) -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/12 net-2.6.25] [NETNS]: Routing namespacing on IP output path.
This set introduces namespacing in the IP output path. The namespace is added to all routing API functions except ones with a valid socket. This is very intrusive. Routing cache is virtualized as a part of this efforts, though the hash function is not tuned to use namespace id. This not required to work in initial namespace. ICMP replies now also use correct namespace. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/12 net-2.6.25] [NETNS]: Add namespace parameter to ip_route_output_flow.
Needed to propagate it down to the __ip_route_output_key. Signed_off_by: Denis V. Lunev [EMAIL PROTECTED] --- drivers/infiniband/hw/cxgb3/iwch_cm.c |2 +- include/net/route.h |6 +++--- net/dccp/ipv4.c |2 +- net/ipv4/af_inet.c|2 +- net/ipv4/inet_connection_sock.c |2 +- net/ipv4/ip_output.c |2 +- net/ipv4/raw.c|2 +- net/ipv4/route.c |7 --- net/ipv4/udp.c|2 +- 9 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 20ba372..ff3dee4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -332,7 +332,7 @@ static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, } }; - if (ip_route_output_flow(rt, fl, NULL, 0)) + if (ip_route_output_flow(init_net, rt, fl, NULL, 0)) return NULL; return rt; } diff --git a/include/net/route.h b/include/net/route.h index 3e3b14e..6b970d7 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -112,7 +112,7 @@ extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, extern voidrt_cache_flush(int how); extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp); extern int ip_route_output_key(struct rtable **, struct flowi *flp); -extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); +extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin); extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); extern voidip_rt_send_redirect(struct sk_buff *skb); @@ -167,7 +167,7 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, *rp = NULL; } security_sk_classify_flow(sk, fl); - return ip_route_output_flow(rp, fl, sk, flags); + return ip_route_output_flow(init_net, rp, fl, sk, flags); } static inline int ip_route_newports(struct rtable **rp, u8 protocol, @@ -184,7 +184,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, ip_rt_put(*rp); *rp = NULL; security_sk_classify_flow(sk, fl); - return ip_route_output_flow(rp, fl, sk, 0); + return ip_route_output_flow(init_net, rp, fl, sk, 0); } return 0; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f450df2..9e38b0d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -469,7 +469,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, }; security_skb_classify_flow(skb, fl); - if (ip_route_output_flow(rt, fl, sk, 0)) { + if (ip_route_output_flow(init_net, rt, fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index bcf8c8a..09ca529 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1113,7 +1113,7 @@ int inet_sk_rebuild_header(struct sock *sk) }; security_sk_classify_flow(sk, fl); - err = ip_route_output_flow(rt, fl, sk, 0); + err = ip_route_output_flow(init_net, rt, fl, sk, 0); } if (!err) sk_setup_caps(sk, rt-u.dst); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1c2a32f..7801cce 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -333,7 +333,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .dport = ireq-rmt_port } } }; security_req_classify_flow(req, fl); - if (ip_route_output_flow(rt, fl, sk, 0)) { + if (ip_route_output_flow(init_net, rt, fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e57de0f..dc56e40 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -350,7 +350,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) * itself out. */ security_sk_classify_flow(sk, fl); - if (ip_route_output_flow(rt, fl, sk, 0)) + if (ip_route_output_flow(init_net, rt, fl, sk, 0)) goto no_route; } sk_setup_caps(sk, rt-u.dst); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 91a5218..85c0869
[PATCH 3/12 net-2.6.25] [NETNS]: Add netns parameter to fib_select_default.
Currently fib_select_default calls fib_get_table() with the init_net. Prepare it to provide a correct namespace to lookup default route. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h|3 ++- net/ipv4/fib_frontend.c |5 +++-- net/ipv4/route.c|2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 39f944a..9daa60b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -213,7 +213,8 @@ extern const struct nla_policy rtm_ipv4_policy[]; extern voidip_fib_init(void); extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, __be32 *spec_dst, u32 *itag); -extern void fib_select_default(const struct flowi *flp, struct fib_result *res); +extern void fib_select_default(struct net *net, const struct flowi *flp, + struct fib_result *res); /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 13bf01d..7e3e732 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -116,7 +116,8 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } #endif /* CONFIG_IP_MULTIPLE_TABLES */ -void fib_select_default(const struct flowi *flp, struct fib_result *res) +void fib_select_default(struct net *net, + const struct flowi *flp, struct fib_result *res) { struct fib_table *tb; int table = RT_TABLE_MAIN; @@ -125,7 +126,7 @@ void fib_select_default(const struct flowi *flp, struct fib_result *res) return; table = res-r-table; #endif - tb = fib_get_table(init_net, table); + tb = fib_get_table(net, table); if (FIB_RES_GW(*res) FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) tb-tb_select_default(tb, flp, res); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 27e0f81..4313255 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2419,7 +2419,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) else #endif if (!res.prefixlen res.type == RTN_UNICAST !fl.oif) - fib_select_default(fl, res); + fib_select_default(init_net, fl, res); if (!fl.fl4_src) fl.fl4_src = FIB_RES_PREFSRC(res); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/12 net-2.6.25] [NETNS]: Add namespace parameter to ip_route_output_slow.
This function needs a net namespace to lookup devices, fib tables, etc. in, so pass it there. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c | 22 +++-- 1 files changed, 11 insertions(+), 10 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 674575b..c1f9950 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2248,7 +2248,8 @@ static inline int ip_mkroute_output(struct rtable **rp, * Major route resolver routine. */ -static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) +static int ip_route_output_slow(struct net *net, struct rtable **rp, + const struct flowi *oldflp) { u32 tos = RT_FL_TOS(oldflp); struct flowi fl = { .nl_u = { .ip4_u = @@ -2260,7 +2261,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) RT_SCOPE_UNIVERSE), } }, .mark = oldflp-mark, - .iif = init_net.loopback_dev-ifindex, + .iif = net-loopback_dev-ifindex, .oif = oldflp-oif }; struct fib_result res; unsigned flags = 0; @@ -2282,7 +2283,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) goto out; /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(init_net, oldflp-fl4_src); + dev_out = ip_dev_find(net, oldflp-fl4_src); if (dev_out == NULL) goto out; @@ -2322,7 +2323,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) if (oldflp-oif) { - dev_out = dev_get_by_index(init_net, oldflp-oif); + dev_out = dev_get_by_index(net, oldflp-oif); err = -ENODEV; if (dev_out == NULL) goto out; @@ -2356,15 +2357,15 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); if (dev_out) dev_put(dev_out); - dev_out = init_net.loopback_dev; + dev_out = net-loopback_dev; dev_hold(dev_out); - fl.oif = init_net.loopback_dev-ifindex; + fl.oif = net-loopback_dev-ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; } - if (fib_lookup(init_net, fl, res)) { + if (fib_lookup(net, fl, res)) { res.fi = NULL; if (oldflp-oif) { /* Apparently, routing tables are wrong. Assume, @@ -2403,7 +2404,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) fl.fl4_src = fl.fl4_dst; if (dev_out) dev_put(dev_out); - dev_out = init_net.loopback_dev; + dev_out = net-loopback_dev; dev_hold(dev_out); fl.oif = dev_out-ifindex; if (res.fi) @@ -2419,7 +2420,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) else #endif if (!res.prefixlen res.type == RTN_UNICAST !fl.oif) - fib_select_default(init_net, fl, res); + fib_select_default(net, fl, res); if (!fl.fl4_src) fl.fl4_src = FIB_RES_PREFSRC(res); @@ -2469,7 +2469,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) } rcu_read_unlock_bh(); - return ip_route_output_slow(rp, flp); + return ip_route_output_slow(init_net, rp, flp); } EXPORT_SYMBOL_GPL(__ip_route_output_key); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/12 net-2.6.25] [NETNS]: Add namespace parameter to ip_dev_find.
in_dev_find() need a namespace to pass it to fib_get_table(), so add an argument. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- drivers/infiniband/core/addr.c |4 ++-- drivers/infiniband/core/cma.c |2 +- include/linux/inetdevice.h |2 +- net/ipv4/fib_frontend.c|4 ++-- net/ipv4/igmp.c|2 +- net/ipv4/ip_sockglue.c |2 +- net/ipv4/ipmr.c|2 +- net/ipv4/route.c |6 +++--- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 0802b79..963177e 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -110,7 +110,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) __be32 ip = ((struct sockaddr_in *) addr)-sin_addr.s_addr; int ret; - dev = ip_dev_find(ip); + dev = ip_dev_find(init_net, ip); if (!dev) return -EADDRNOTAVAIL; @@ -261,7 +261,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in, __be32 dst_ip = dst_in-sin_addr.s_addr; int ret; - dev = ip_dev_find(dst_ip); + dev = ip_dev_find(init_net, dst_ip); if (!dev) return -EADDRNOTAVAIL; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b37045c..ef9efb3 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1280,7 +1280,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, atomic_inc(conn_id-dev_remove); conn_id-state = CMA_CONNECT; - dev = ip_dev_find(iw_event-local_addr.sin_addr.s_addr); + dev = ip_dev_find(init_net, iw_event-local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; cma_enable_remove(conn_id); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index e74a2ee..8d9eaae 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -129,7 +129,7 @@ struct in_ifaddr extern int register_inetaddr_notifier(struct notifier_block *nb); extern int unregister_inetaddr_notifier(struct notifier_block *nb); -extern struct net_device *ip_dev_find(__be32 addr); +extern struct net_device *ip_dev_find(struct net *net, __be32 addr); extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); extern int devinet_ioctl(unsigned int cmd, void __user *); extern voiddevinet_init(void); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7e3e732..d282618 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -153,7 +153,7 @@ static void fib_flush(struct net *net) * Find the first device with a given source address. */ -struct net_device * ip_dev_find(__be32 addr) +struct net_device * ip_dev_find(struct net *net, __be32 addr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; @@ -164,7 +164,7 @@ struct net_device * ip_dev_find(__be32 addr) res.r = NULL; #endif - local_table = fib_get_table(init_net, RT_TABLE_LOCAL); + local_table = fib_get_table(net, RT_TABLE_LOCAL); if (!local_table || local_table-tb_lookup(local_table, fl, res)) return NULL; if (res.type != RTN_LOCAL) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 928bc32..1f5314c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1395,7 +1395,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) return idev; } if (imr-imr_address.s_addr) { - dev = ip_dev_find(imr-imr_address.s_addr); + dev = ip_dev_find(init_net, imr-imr_address.s_addr); if (!dev) return NULL; dev_put(dev); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 82817e5..754b0a5 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -594,7 +594,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = 0; break; } - dev = ip_dev_find(mreq.imr_address.s_addr); + dev = ip_dev_find(init_net, mreq.imr_address.s_addr); if (dev) { mreq.imr_ifindex = dev-ifindex; dev_put(dev); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 4198615..2212717 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -423,7 +423,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return -ENOBUFS; break; case 0: - dev = ip_dev_find(vifc-vifc_lcl_addr.s_addr); + dev = ip_dev_find(init_net, vifc-vifc_lcl_addr.s_addr); if (!dev) return
[PATCH 1/12 net-2.6.25] [IPV4]: Declarations cleanup in ip_fib.h.
Two small issues fixed: - fib_select_multipath is exported from fib_semantics.c rather than from fib_frontend.c. So, move the declaration below appropriate comment. - struct rt_entry declaration is not used. Drop it. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h |4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a859124..be70b33 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -222,15 +222,13 @@ extern const struct nla_policy rtm_ipv4_policy[]; extern voidip_fib_init(void); extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, __be32 *spec_dst, u32 *itag); -extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); - -struct rtentry; /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); extern int fib_sync_down(__be32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); +extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); /* Exported by fib_{hash|trie}.c */ extern void fib_hash_init(void); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/12 net-2.6.25] [NETNS]: Re-export init_net via EXPORT_SYMBOL.
init_net is used added as a parameter to a lot of old API calls, f.e. ip_dev_find. These calls were exported as EXPORT_SYMBOL. So, export init_net as EXPORT_SYMBOL to keep networking API consistent. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/core/net_namespace.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 8023208..26e941d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -18,7 +18,7 @@ static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); struct net init_net; -EXPORT_SYMBOL_GPL(init_net); +EXPORT_SYMBOL(init_net); /* * setup_net runs the initializers for the network namespace object. -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/12 net-2.6.25] [IPV4]: Consolidate fib_select_default.
The difference in the implementation of the fib_select_default when CONFIG_IP_MULTIPLE_TABLES is (not) defined looks negligible. Consolidate it and place into fib_frontend.c. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h| 10 +- net/ipv4/fib_frontend.c | 14 ++ net/ipv4/fib_rules.c| 10 -- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index be70b33..39f944a 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -193,14 +193,6 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, return -ENETUNREACH; } -static inline void fib_select_default(const struct flowi *flp, - struct fib_result *res) -{ - struct fib_table *table = fib_get_table(init_net, RT_TABLE_MAIN); - if (FIB_RES_GW(*res) FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - table-tb_select_default(table, flp, res); -} - #else /* CONFIG_IP_MULTIPLE_TABLES */ extern int __net_init fib4_rules_init(struct net *net); extern void __net_exit fib4_rules_exit(struct net *net); @@ -213,7 +205,6 @@ extern int fib_lookup(struct net *n, struct flowi *flp, struct fib_result *res); extern struct fib_table *fib_new_table(struct net *net, u32 id); extern struct fib_table *fib_get_table(struct net *net, u32 id); -extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -222,6 +213,7 @@ extern const struct nla_policy rtm_ipv4_policy[]; extern voidip_fib_init(void); extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, __be32 *spec_dst, u32 *itag); +extern void fib_select_default(const struct flowi *flp, struct fib_result *res); /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6761639..13bf01d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -116,6 +116,20 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } #endif /* CONFIG_IP_MULTIPLE_TABLES */ +void fib_select_default(const struct flowi *flp, struct fib_result *res) +{ + struct fib_table *tb; + int table = RT_TABLE_MAIN; +#ifdef CONFIG_IP_MULTIPLE_TABLES + if (res-r == NULL || res-r-action != FR_ACT_TO_TBL) + return; + table = res-r-table; +#endif + tb = fib_get_table(init_net, table); + if (FIB_RES_GW(*res) FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) + tb-tb_select_default(tb, flp, res); +} + static void fib_flush(struct net *net) { int flushed = 0; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 1effb4a..19274d0 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -102,16 +102,6 @@ errout: } -void fib_select_default(const struct flowi *flp, struct fib_result *res) -{ - if (res-r res-r-action == FR_ACT_TO_TBL - FIB_RES_GW(*res) FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { - struct fib_table *tb; - if ((tb = fib_get_table(init_net, res-r-table)) != NULL) - tb-tb_select_default(tb, flp, res); - } -} - static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib4_rule *r = (struct fib4_rule *) rule; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/12 net-2.6.25] [NETNS]: Add namespace parameter to __ip_route_output_key.
This is only required to propagate it down to the ip_route_output_slow. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/route.h |4 ++-- net/ipv4/icmp.c |4 ++-- net/ipv4/route.c|7 --- net/ipv4/xfrm4_policy.c |2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 5847e6f..3e3b14e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -110,7 +110,7 @@ extern int ip_rt_init(void); extern voidip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, __be32 src, struct net_device *dev); extern voidrt_cache_flush(int how); -extern int __ip_route_output_key(struct rtable **, const struct flowi *flp); +extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp); extern int ip_route_output_key(struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin); @@ -158,7 +158,7 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, int err; if (!dst || !src) { - err = __ip_route_output_key(rp, fl); + err = __ip_route_output_key(init_net, rp, fl); if (err) return err; fl.fl4_dst = (*rp)-rt_dst; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7ed8c50..21422bf 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -569,7 +569,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct rtable *rt2; security_skb_classify_flow(skb_in, fl); - if (__ip_route_output_key(rt, fl)) + if (__ip_route_output_key(init_net, rt, fl)) goto out_unlock; /* No need to clone since we're just using its address. */ @@ -592,7 +592,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) goto out_unlock; if (inet_addr_type(init_net, fl.fl4_src) == RTN_LOCAL) - err = __ip_route_output_key(rt2, fl); + err = __ip_route_output_key(init_net, rt2, fl); else { struct flowi fl2 = {}; struct dst_entry *odst; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c1f9950..cb035cc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2442,7 +2442,8 @@ make_route: out: return err; } -int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) +int __ip_route_output_key(struct net *net, struct rtable **rp, + const struct flowi *flp) { unsigned hash; struct rtable *rth; @@ -2469,7 +2470,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) } rcu_read_unlock_bh(); - return ip_route_output_slow(init_net, rp, flp); + return ip_route_output_slow(net, rp, flp); } EXPORT_SYMBOL_GPL(__ip_route_output_key); @@ -2535,7 +2536,7 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, { int err; - if ((err = __ip_route_output_key(rp, flp)) != 0) + if ((err = __ip_route_output_key(init_net, rp, flp)) != 0) return err; if (flp-proto) { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index f04516c..3783e3e 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -36,7 +36,7 @@ static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) fl.fl4_src = saddr-a4; - err = __ip_route_output_key(rt, fl); + err = __ip_route_output_key(init_net, rt, fl); dst = rt-u.dst; if (err) dst = ERR_PTR(err); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 9/12 net-2.6.25] [NETNS]: Add namespace parameter to ip_route_output_key.
Needed to propagate it down to the ip_route_output_flow. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- drivers/infiniband/core/addr.c |4 ++-- drivers/net/bonding/bond_main.c |2 +- include/net/route.h |2 +- net/atm/clip.c |2 +- net/bridge/br_netfilter.c|2 +- net/ipv4/arp.c |6 +++--- net/ipv4/icmp.c |4 ++-- net/ipv4/igmp.c |6 +++--- net/ipv4/ip_gre.c| 10 +- net/ipv4/ip_output.c |2 +- net/ipv4/ipip.c |8 net/ipv4/ipmr.c |4 ++-- net/ipv4/ipvs/ip_vs_xmit.c |6 +++--- net/ipv4/netfilter.c |6 +++--- net/ipv4/netfilter/nf_nat_rule.c |2 +- net/ipv4/route.c |6 +++--- net/ipv4/syncookies.c|2 +- net/ipv6/ip6_tunnel.c|4 ++-- net/ipv6/sit.c |4 ++-- net/rxrpc/ar-peer.c |2 +- net/sctp/protocol.c |4 ++-- 21 files changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 963177e..a58ad8a 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -158,7 +158,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in) memset(fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; - if (ip_route_output_key(rt, fl)) + if (ip_route_output_key(init_net, rt, fl)) return; neigh_event_send(rt-u.dst.neighbour, NULL); @@ -179,7 +179,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, memset(fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; fl.nl_u.ip4_u.saddr = src_ip; - ret = ip_route_output_key(rt, fl); + ret = ip_route_output_key(init_net, rt, fl); if (ret) goto out; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b0b2603..7a7be20 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2513,7 +2513,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) fl.fl4_dst = targets[i]; fl.fl4_tos = RTO_ONLINK; - rv = ip_route_output_key(rt, fl); + rv = ip_route_output_key(init_net, rt, fl); if (rv) { if (net_ratelimit()) { printk(KERN_WARNING DRV_NAME diff --git a/include/net/route.h b/include/net/route.h index 6b970d7..d9b876a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -111,7 +111,7 @@ extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, __be32 src, struct net_device *dev); extern voidrt_cache_flush(int how); extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp); -extern int ip_route_output_key(struct rtable **, struct flowi *flp); +extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin); extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); diff --git a/net/atm/clip.c b/net/atm/clip.c index 45e0862..86b885e 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -534,7 +534,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) unlink_clip_vcc(clip_vcc); return 0; } - error = ip_route_output_key(rt, fl); + error = ip_route_output_key(init_net, rt, fl); if (error) return error; neigh = __neigh_lookup(clip_tbl, ip, rt-u.dst.dev, 1); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 0e884fe..d4579cf 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -336,7 +336,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) goto free_skb; - if (!ip_route_output_key(rt, fl)) { + if (!ip_route_output_key(init_net, rt, fl)) { /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ if (((struct dst_entry *)rt)-dev == dev) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a44ff1a..a3cfd04 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -424,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) int flag = 0; /*unsigned
Re: [PATCH 5/12 net-2.6.25] [NETNS]: Re-export init_net via EXPORT_SYMBOL.
Patrick McHardy wrote: Denis V. Lunev wrote: init_net is used added as a parameter to a lot of old API calls, f.e. ip_dev_find. These calls were exported as EXPORT_SYMBOL. So, export init_net as EXPORT_SYMBOL to keep networking API consistent. I think this should go in 2.6.24 if still possible so people don't have to find workarounds that will be obsolete one version later. yep, sure :) should I send this one separate for 2.4? -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 10/12 net-2.6.25] [NETNS]: Correct namespace for connect-time routing.
ip_route_connect and ip_route_newports are a part of routing API presented to the socket layer. The namespace is available inside them through a socket. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/route.h |8 +--- 1 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index d9b876a..1985d82 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -33,6 +33,7 @@ #include linux/ip.h #include linux/cache.h #include linux/security.h +#include net/sock.h #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -157,8 +158,9 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, .dport = dport } } }; int err; + struct net *net = sk-sk_net; if (!dst || !src) { - err = __ip_route_output_key(init_net, rp, fl); + err = __ip_route_output_key(net, rp, fl); if (err) return err; fl.fl4_dst = (*rp)-rt_dst; @@ -167,7 +169,7 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, *rp = NULL; } security_sk_classify_flow(sk, fl); - return ip_route_output_flow(init_net, rp, fl, sk, flags); + return ip_route_output_flow(net, rp, fl, sk, flags); } static inline int ip_route_newports(struct rtable **rp, u8 protocol, @@ -184,7 +186,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, ip_rt_put(*rp); *rp = NULL; security_sk_classify_flow(sk, fl); - return ip_route_output_flow(init_net, rp, fl, sk, 0); + return ip_route_output_flow(sk-sk_net, rp, fl, sk, 0); } return 0; } -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/12 net-2.6.25] [NETNS]: Routing cache virtualization.
Basically, this piece looks relatively easy. Namespace is already available on the dst entry via device and the device is safe to dereferrence. Compare it with one of a searcher and skip entry if appropriate. The only exception is ip_rt_frag_needed. So, add namespace parameter to it. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/route.h |2 +- net/ipv4/icmp.c |2 +- net/ipv4/route.c| 21 - 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 1985d82..4eabf00 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -115,7 +115,7 @@ extern int __ip_route_output_key(struct net *, struct rtable **, const struct f extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin); -extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); +extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu); extern voidip_rt_send_redirect(struct sk_buff *skb); extern unsignedinet_addr_type(struct net *net, __be32 addr); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c04aac5..052b278 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -696,7 +696,7 @@ static void icmp_unreach(struct sk_buff *skb) and DF set.\n, NIPQUAD(iph-daddr)); } else { - info = ip_rt_frag_needed(iph, + info = ip_rt_frag_needed(init_net, iph, ntohs(icmph-un.frag.mtu)); if (!info) goto out; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 87076c6..07dd295 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -648,6 +648,11 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) (fl1-iif ^ fl2-iif)) == 0; } +static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) +{ + return rt1-u.dst.dev-nd_net == rt2-u.dst.dev-nd_net; +} + /* * Perform a full scan of hash table and free all entries. * Can be called by a softirq or a process. @@ -961,7 +966,7 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { - if (compare_keys(rth-fl, rt-fl)) { + if (compare_keys(rth-fl, rt-fl) compare_netns(rth, rt)) { /* Put it first */ *rthp = rth-u.dst.rt_next; /* @@ -1415,7 +1420,8 @@ static __inline__ unsigned short guess_mtu(unsigned short old_mtu) return 68; } -unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) +unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, +unsigned short new_mtu) { int i; unsigned short old_mtu = ntohs(iph-tot_len); @@ -1438,7 +1444,8 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) rth-rt_dst == daddr rth-rt_src == iph-saddr rth-fl.iif == 0 - !(dst_metric_locked(rth-u.dst, RTAX_MTU))) { + !(dst_metric_locked(rth-u.dst, RTAX_MTU)) + rth-u.dst.dev-nd_net == net) { unsigned short mtu = new_mtu; if (new_mtu 68 || new_mtu = old_mtu) { @@ -2049,7 +2056,9 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct rtable * rth; unsignedhash; int iif = dev-ifindex; + struct net *net; + net = skb-dev-nd_net; tos = IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif); @@ -2061,7 +2070,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth-fl.iif == iif rth-fl.oif == 0 rth-fl.mark == skb-mark - rth-fl.fl4_tos == tos) { + rth-fl.fl4_tos == tos + rth-u.dst.dev-nd_net == net) { dst_use(rth-u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2459,7 +2469,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rth-fl.oif == flp-oif rth-fl.mark == flp-mark !((rth-fl.fl4_tos ^ flp-fl4_tos
[PATCH 12/12 net-2.6.25] [NETNS]: Add namespace for ICMP replying code.
All needed API is done, the namespace is available when required from the device on the DST entry from the incoming packet. So, just replace init_net with proper namespace. Other protocols will follow. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/icmp.c | 21 + net/ipv4/ip_output.c |2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 052b278..a6c092c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -404,7 +404,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .tos = RT_TOS(ip_hdr(skb)-tos) } }, .proto = IPPROTO_ICMP }; security_skb_classify_flow(skb, fl); - if (ip_route_output_key(init_net, rt, fl)) + if (ip_route_output_key(rt-u.dst.dev-nd_net, rt, fl)) goto out_unlock; } if (icmpv4_xrlim_allow(rt, icmp_param-data.icmph.type, @@ -436,9 +436,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct ipcm_cookie ipc; __be32 saddr; u8 tos; + struct net *net; if (!rt) goto out; + net = rt-u.dst.dev-nd_net; /* * Find the original header. It is expected to be valid, of course. @@ -514,7 +516,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct net_device *dev = NULL; if (rt-fl.iif sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index(init_net, rt-fl.iif); + dev = dev_get_by_index(net, rt-fl.iif); if (dev) { saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); @@ -569,7 +571,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct rtable *rt2; security_skb_classify_flow(skb_in, fl); - if (__ip_route_output_key(init_net, rt, fl)) + if (__ip_route_output_key(net, rt, fl)) goto out_unlock; /* No need to clone since we're just using its address. */ @@ -591,14 +593,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (xfrm_decode_session_reverse(skb_in, fl, AF_INET)) goto out_unlock; - if (inet_addr_type(init_net, fl.fl4_src) == RTN_LOCAL) - err = __ip_route_output_key(init_net, rt2, fl); + if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) + err = __ip_route_output_key(net, rt2, fl); else { struct flowi fl2 = {}; struct dst_entry *odst; fl2.fl4_dst = fl.fl4_src; - if (ip_route_output_key(init_net, rt2, fl2)) + if (ip_route_output_key(net, rt2, fl2)) goto out_unlock; /* Ugh! */ @@ -666,6 +668,9 @@ static void icmp_unreach(struct sk_buff *skb) int hash, protocol; struct net_protocol *ipprot; u32 info = 0; + struct net *net; + + net = skb-dst-dev-nd_net; /* * Incomplete header ? @@ -696,7 +701,7 @@ static void icmp_unreach(struct sk_buff *skb) and DF set.\n, NIPQUAD(iph-daddr)); } else { - info = ip_rt_frag_needed(init_net, iph, + info = ip_rt_frag_needed(net, iph, ntohs(icmph-un.frag.mtu)); if (!info) goto out; @@ -734,7 +739,7 @@ static void icmp_unreach(struct sk_buff *skb) */ if (!sysctl_icmp_ignore_bogus_error_responses - inet_addr_type(init_net, iph-daddr) == RTN_BROADCAST) { + inet_addr_type(net, iph-daddr) == RTN_BROADCAST) { if (net_ratelimit()) printk(KERN_WARNING %u.%u.%u.%u sent an invalid ICMP type %u, code %u diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6a5b839..4fad239 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1377,7 +1377,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .dport = tcp_hdr(skb)-source } }, .proto = sk-sk_protocol }; security_skb_classify_flow(skb, fl); - if (ip_route_output_key(init_net, rt, fl)) + if (ip_route_output_key(sk-sk_net, rt, fl
[PATCH 0/6 net-2.6.25] Provide correct namespace on IPv4 packet input path.
This patchset sequentially adds namespace parameter to fib_lookup and inetdev_by_index. After that it is possible to pass network namespace from input packet to routing engine. Output path is much more intrusive and will be sent separately. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/6 net-2.6.25] [NETNS] Pass correct namespace in fib_validate_source.
Correct network namespace is available inside fib_validate_source. It can be obtained from the device passed in. The device is not NULL as in_device is obtained from it just above. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/fib_frontend.c |6 -- 1 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index dcd3a28..39b8b35 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -243,6 +243,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct fib_result res; int no_addr, rpf; int ret; + struct net *net; no_addr = rpf = 0; rcu_read_lock(); @@ -256,7 +257,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (in_dev == NULL) goto e_inval; - if (fib_lookup(init_net, fl, res)) + net = dev-nd_net; + if (fib_lookup(net, fl, res)) goto last_resort; if (res.type != RTN_UNICAST) goto e_inval_res; @@ -280,7 +282,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, fl.oif = dev-ifindex; ret = 0; - if (fib_lookup(init_net, fl, res) == 0) { + if (fib_lookup(net, fl, res) == 0) { if (res.type == RTN_UNICAST) { *spec_dst = FIB_RES_PREFSRC(res); ret = FIB_RES_NH(res).nh_scope = RT_SCOPE_HOST; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/6 net-2.6.25] [NETNS] Pass correct namespace in ip_route_input_slow.
The packet on the input path always has a referrence to an input network device it is passed from. Extract network namespace from it. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c |7 --- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c107bc3..b3c6122 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1881,6 +1881,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, __be32 spec_dst; int err = -EINVAL; int free_res = 0; + struct net* net = dev-nd_net; /* IP on this device is disabled. */ @@ -1911,7 +1912,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - if ((err = fib_lookup(init_net, fl, res)) != 0) { + if ((err = fib_lookup(net, fl, res)) != 0) { if (!IN_DEV_FORWARD(in_dev)) goto e_hostunreach; goto no_route; @@ -1926,7 +1927,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { int result; result = fib_validate_source(saddr, daddr, tos, -init_net.loopback_dev-ifindex, +net-loopback_dev-ifindex, dev, spec_dst, itag); if (result 0) goto martian_source; @@ -1988,7 +1989,7 @@ local_input: #endif rth-rt_iif = rth-fl.iif = dev-ifindex; - rth-u.dst.dev = init_net.loopback_dev; + rth-u.dst.dev = net-loopback_dev; dev_hold(rth-u.dst.dev); rth-idev = in_dev_get(rth-u.dst.dev); rth-rt_gateway = daddr; -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/6 net-2.6.25] [NETNS] Pass correct namespace in context fib_check_nh.
Correct network namespace is already used in fib_check_nh. Re-work its usage for better readability and pass into fib_lookup inetdev_by_index. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/fib_semantics.c | 12 ++-- 1 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8b47e11..c791286 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -519,7 +519,9 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh) { int err; + struct net *net; + net = cfg-fc_nlinfo.nl_net; if (nh-nh_gw) { struct fib_result res; @@ -532,11 +534,9 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, if (cfg-fc_scope = RT_SCOPE_LINK) return -EINVAL; - if (inet_addr_type(cfg-fc_nlinfo.nl_net, - nh-nh_gw) != RTN_UNICAST) + if (inet_addr_type(net, nh-nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(cfg-fc_nlinfo.nl_net, - nh-nh_oif)) == NULL) + if ((dev = __dev_get_by_index(net, nh-nh_oif)) == NULL) return -ENODEV; if (!(dev-flagsIFF_UP)) return -ENETDOWN; @@ -559,7 +559,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope RT_SCOPE_LINK) fl.fl4_scope = RT_SCOPE_LINK; - if ((err = fib_lookup(init_net, fl, res)) != 0) + if ((err = fib_lookup(net, fl, res)) != 0) return err; } err = -EINVAL; @@ -583,7 +583,7 @@ out: if (nh-nh_flags(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; - in_dev = inetdev_by_index(init_net, nh-nh_oif); + in_dev = inetdev_by_index(net, nh-nh_oif); if (in_dev == NULL) return -ENODEV; if (!(in_dev-dev-flagsIFF_UP)) { -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/6 net-2.6.25] [NETNS] Pass correct namespace in ip_rt_get_source.
ip_rt_get_source is the infamous place for which dst_ifdown kludges have been implemented. This means that rt-u.dst.dev can be safely dereferrenced obtain nd_net. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- net/ipv4/route.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b3c6122..ede0571 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1559,7 +1559,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt-fl.iif == 0) src = rt-rt_src; - else if (fib_lookup(init_net, rt-fl, res) == 0) { + else if (fib_lookup(rt-u.dst.dev-nd_net, rt-fl, res) == 0) { src = FIB_RES_PREFSRC(res); fib_res_put(res); } else -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/6 net-2.6.25] [NETNS] Add netns parameter to fib_lookup.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/net/ip_fib.h |9 + net/ipv4/fib_frontend.c |4 ++-- net/ipv4/fib_rules.c |4 ++-- net/ipv4/fib_semantics.c |2 +- net/ipv4/route.c |6 +++--- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 08ebb1e..9daa60b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -178,15 +178,16 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id) return fib_get_table(net, id); } -static inline int fib_lookup(const struct flowi *flp, struct fib_result *res) +static inline int fib_lookup(struct net *net, const struct flowi *flp, +struct fib_result *res) { struct fib_table *table; - table = fib_get_table(init_net, RT_TABLE_LOCAL); + table = fib_get_table(net, RT_TABLE_LOCAL); if (!table-tb_lookup(table, flp, res)) return 0; - table = fib_get_table(init_net, RT_TABLE_MAIN); + table = fib_get_table(net, RT_TABLE_MAIN); if (!table-tb_lookup(table, flp, res)) return 0; return -ENETUNREACH; @@ -200,7 +201,7 @@ extern void __net_exit fib4_rules_exit(struct net *net); extern u32 fib_rules_tclass(struct fib_result *res); #endif -extern int fib_lookup(struct flowi *flp, struct fib_result *res); +extern int fib_lookup(struct net *n, struct flowi *flp, struct fib_result *res); extern struct fib_table *fib_new_table(struct net *net, u32 id); extern struct fib_table *fib_get_table(struct net *net, u32 id); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8c0081c..dcd3a28 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -256,7 +256,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (in_dev == NULL) goto e_inval; - if (fib_lookup(fl, res)) + if (fib_lookup(init_net, fl, res)) goto last_resort; if (res.type != RTN_UNICAST) goto e_inval_res; @@ -280,7 +280,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, fl.oif = dev-ifindex; ret = 0; - if (fib_lookup(fl, res) == 0) { + if (fib_lookup(init_net, fl, res) == 0) { if (res.type == RTN_UNICAST) { *spec_dst = FIB_RES_PREFSRC(res); ret = FIB_RES_NH(res).nh_scope = RT_SCOPE_HOST; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 2b43002..19274d0 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -54,14 +54,14 @@ u32 fib_rules_tclass(struct fib_result *res) } #endif -int fib_lookup(struct flowi *flp, struct fib_result *res) +int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, }; int err; - err = fib_rules_lookup(init_net.ipv4.rules_ops, flp, 0, arg); + err = fib_rules_lookup(net-ipv4.rules_ops, flp, 0, arg); res-r = arg.rule; return err; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0e08df4..ecd91c6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -559,7 +559,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope RT_SCOPE_LINK) fl.fl4_scope = RT_SCOPE_LINK; - if ((err = fib_lookup(fl, res)) != 0) + if ((err = fib_lookup(init_net, fl, res)) != 0) return err; } err = -EINVAL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 162e738..c107bc3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1559,7 +1559,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt-fl.iif == 0) src = rt-rt_src; - else if (fib_lookup(rt-fl, res) == 0) { + else if (fib_lookup(init_net, rt-fl, res) == 0) { src = FIB_RES_PREFSRC(res); fib_res_put(res); } else @@ -1911,7 +1911,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - if ((err = fib_lookup(fl, res)) != 0) { + if ((err = fib_lookup(init_net, fl, res)) != 0) { if (!IN_DEV_FORWARD(in_dev)) goto e_hostunreach; goto no_route; @@ -2363,7 +2363,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) goto make_route; } - if (fib_lookup(fl, res)) { + if (fib_lookup(init_net, fl, res)) { res.fi = NULL; if (oldflp-oif
[PATCH 2/6 net-2.6.25] [NETNS] Add netns parameter to inetdev_by_index.
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] --- include/linux/inetdevice.h |2 +- net/ipv4/devinet.c |6 +++--- net/ipv4/fib_semantics.c |2 +- net/ipv4/igmp.c|4 ++-- net/ipv4/ip_gre.c |3 ++- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 45f3731..e74a2ee 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -133,7 +133,7 @@ extern struct net_device*ip_dev_find(__be32 addr); extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); extern int devinet_ioctl(unsigned int cmd, void __user *); extern voiddevinet_init(void); -extern struct in_device*inetdev_by_index(int); +extern struct in_device*inetdev_by_index(struct net *, int); extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); extern __be32 inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local, int scope); extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e381edb..21f71bf 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -409,12 +409,12 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) return inet_insert_ifa(ifa); } -struct in_device *inetdev_by_index(int ifindex) +struct in_device *inetdev_by_index(struct net *net, int ifindex) { struct net_device *dev; struct in_device *in_dev = NULL; read_lock(dev_base_lock); - dev = __dev_get_by_index(init_net, ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev) in_dev = in_dev_get(dev); read_unlock(dev_base_lock); @@ -454,7 +454,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg goto errout; ifm = nlmsg_data(nlh); - in_dev = inetdev_by_index(ifm-ifa_index); + in_dev = inetdev_by_index(net, ifm-ifa_index); if (in_dev == NULL) { err = -ENODEV; goto errout; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ecd91c6..8b47e11 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -583,7 +583,7 @@ out: if (nh-nh_flags(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; - in_dev = inetdev_by_index(nh-nh_oif); + in_dev = inetdev_by_index(init_net, nh-nh_oif); if (in_dev == NULL) return -ENODEV; if (!(in_dev-dev-flagsIFF_UP)) { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 285d262..b4df39a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1389,7 +1389,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) struct in_device *idev = NULL; if (imr-imr_ifindex) { - idev = inetdev_by_index(imr-imr_ifindex); + idev = inetdev_by_index(init_net, imr-imr_ifindex); if (idev) __in_dev_put(idev); return idev; @@ -,7 +,7 @@ void ip_mc_drop_socket(struct sock *sk) struct in_device *in_dev; inet-mc_list = iml-next; - in_dev = inetdev_by_index(iml-multi.imr_ifindex); + in_dev = inetdev_by_index(init_net, iml-multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml-multi.imr_multiaddr.s_addr); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8b81deb..a74983d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1193,7 +1193,8 @@ static int ipgre_close(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t-parms.iph.daddr) t-mlink) { - struct in_device *in_dev = inetdev_by_index(t-mlink); + struct in_device *in_dev; + in_dev = inetdev_by_index(dev-nd_net, t-mlink); if (in_dev) { ip_mc_dec_group(in_dev, t-parms.iph.daddr); in_dev_put(in_dev); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH ] [NETNS 3/4 net-2.6.25] Consolidate kernel netlink socket destruction.
Create a specific helper for netlink kernel socket disposal. This just let the code look better and provides a ground for proper disposal inside a namespace. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Tested-by: Alexey Dobriyan [EMAIL PROTECTED] --- drivers/connector/connector.c |9 +++-- drivers/scsi/scsi_netlink.c |2 +- drivers/scsi/scsi_transport_iscsi.c |2 +- fs/ecryptfs/netlink.c |3 +-- include/linux/netlink.h |1 + net/bridge/netfilter/ebt_ulog.c |4 ++-- net/core/rtnetlink.c|2 +- net/decnet/netfilter/dn_rtmsg.c |4 ++-- net/ipv4/fib_frontend.c |2 +- net/ipv4/inet_diag.c|2 +- net/ipv4/netfilter/ip_queue.c |4 ++-- net/ipv4/netfilter/ipt_ULOG.c |4 ++-- net/ipv6/netfilter/ip6_queue.c |4 ++-- net/netfilter/nfnetlink.c |2 +- net/netlink/af_netlink.c| 11 +++ net/xfrm/xfrm_user.c|2 +- 16 files changed, 33 insertions(+), 25 deletions(-) diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 37976dc..fea2d3e 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -420,8 +420,7 @@ static int __devinit cn_init(void) dev-cbdev = cn_queue_alloc_dev(cqueue, dev-nls); if (!dev-cbdev) { - if (dev-nls-sk_socket) - sock_release(dev-nls-sk_socket); + netlink_kernel_release(dev-nls); return -EINVAL; } @@ -431,8 +430,7 @@ static int __devinit cn_init(void) if (err) { cn_already_initialized = 0; cn_queue_free_dev(dev-cbdev); - if (dev-nls-sk_socket) - sock_release(dev-nls-sk_socket); + netlink_kernel_release(dev-nls); return -EINVAL; } @@ -447,8 +445,7 @@ static void __devexit cn_fini(void) cn_del_callback(dev-id); cn_queue_free_dev(dev-cbdev); - if (dev-nls-sk_socket) - sock_release(dev-nls-sk_socket); + netlink_kernel_release(dev-nls); } subsys_initcall(cn_init); diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 40579ed..fe48c24 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -169,7 +169,7 @@ void scsi_netlink_exit(void) { if (scsi_nl_sock) { - sock_release(scsi_nl_sock-sk_socket); + netlink_kernel_release(scsi_nl_sock); netlink_unregister_notifier(scsi_netlink_notifier); } diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 5428d15..9e463a6 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1533,7 +1533,7 @@ unregister_transport_class: static void __exit iscsi_transport_exit(void) { - sock_release(nls-sk_socket); + netlink_kernel_release(nls); transport_class_unregister(iscsi_connection_class); transport_class_unregister(iscsi_session_class); transport_class_unregister(iscsi_host_class); diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index 9aa3451..f638a69 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c @@ -237,7 +237,6 @@ out: */ void ecryptfs_release_netlink(void) { - if (ecryptfs_nl_sock ecryptfs_nl_sock-sk_socket) - sock_release(ecryptfs_nl_sock-sk_socket); + netlink_kernel_release(ecryptfs_nl_sock); ecryptfs_nl_sock = NULL; } diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 2aee0f5..bd13b6f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -178,6 +178,7 @@ extern struct sock *netlink_kernel_create(struct net *net, void (*input)(struct sk_buff *skb), struct mutex *cb_mutex, struct module *module); +extern void netlink_kernel_release(struct sock *sk); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index b73ba28..8e7b00b 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -307,7 +307,7 @@ static int __init ebt_ulog_init(void) if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(ulog))) - sock_release(ebtulognl-sk_socket); + netlink_kernel_release(ebtulognl); if (ret == 0) nf_log_register(PF_BRIDGE, ebt_ulog_logger); @@ -333,7 +333,7 @@ static void __exit ebt_ulog_fini(void
[PATCH] [NETNS 4/4 net-2.6.25] Namespace stop vs 'ip r l' race.
During network namespace stop process kernel side netlink sockets belonging to a namespace should be closed. They should not prevent namespace to stop, so they do not increment namespace usage counter. Though this counter will be put during last sock_put. The raplacement of the correct netns for init_ns solves the problem only partial as socket to be stoped until proper stop is a valid netlink kernel socket and can be looked up by the user processes. This is not a problem until it resides in initial namespace (no processes inside this net), but this is not true for init_net. So, hold the referrence for a socket, remove it from lookup tables and only after that change namespace and perform a last put. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Tested-by: Alexey Dobriyan [EMAIL PROTECTED] --- net/core/rtnetlink.c | 15 ++- net/ipv4/fib_frontend.c |7 +-- net/netlink/af_netlink.c | 15 +++ 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2ef9480..aafc34d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1365,25 +1365,14 @@ static int rtnetlink_net_init(struct net *net) rtnetlink_rcv, rtnl_mutex, THIS_MODULE); if (!sk) return -ENOMEM; - - /* Don't hold an extra reference on the namespace */ - put_net(sk-sk_net); net-rtnl = sk; return 0; } static void rtnetlink_net_exit(struct net *net) { - struct sock *sk = net-rtnl; - if (sk) { - /* At the last minute lie and say this is a socket for the -* initial network namespace. So the socket will be safe to -* free. -*/ - sk-sk_net = get_net(init_net); - netlink_kernel_release(net-rtnl); - net-rtnl = NULL; - } + netlink_kernel_release(net-rtnl); + net-rtnl = NULL; } static struct pernet_operations rtnetlink_net_ops = { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e787d21..62bd791 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -869,19 +869,14 @@ static int nl_fib_lookup_init(struct net *net) nl_fib_input, NULL, THIS_MODULE); if (sk == NULL) return -EAFNOSUPPORT; - /* Don't hold an extra reference on the namespace */ - put_net(sk-sk_net); net-ipv4.fibnl = sk; return 0; } static void nl_fib_lookup_exit(struct net *net) { - /* At the last minute lie and say this is a socket for the -* initial network namespace. So the socket will be safe to free. -*/ - net-ipv4.fibnl-sk_net = get_net(init_net); netlink_kernel_release(net-ipv4.fibnl); + net-ipv4.fibnl = NULL; } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 626a582..6b178e1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1396,6 +1396,9 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, } netlink_table_ungrab(); + /* Do not hold an extra referrence to a namespace as this socket is +* internal to a namespace and does not prevent it to stop. */ + put_net(net); return sk; out_sock_release: @@ -1411,7 +1414,19 @@ netlink_kernel_release(struct sock *sk) { if (sk == NULL || sk-sk_socket == NULL) return; + + /* +* Last sock_put should drop referrence to sk-sk_net. It has already +* been dropped in netlink_kernel_create. Taking referrence to stopping +* namespace is not an option. +* Take referrence to a socket to remove it from netlink lookup table +* _alive_ and after that destroy it in the context of init_net. +*/ + sock_hold(sk); sock_release(sk-sk_socket); + + sk-sk_net = get_net(init_net); + sock_put(sk); } EXPORT_SYMBOL(netlink_kernel_release); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [NETNS 2/4 net-2.6.25] Memory leak on network namespace stop.
Network namespace allocates 2 kernel netlink sockets, fibnl rtnl. These sockets should be disposed properly, i.e. by sock_release. Plain sock_put is not enough. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Tested-by: Alexey Dobriyan [EMAIL PROTECTED] --- net/core/rtnetlink.c|2 +- net/ipv4/fib_frontend.c |2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4a07e83..2c1f665 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1381,7 +1381,7 @@ static void rtnetlink_net_exit(struct net *net) * free. */ sk-sk_net = get_net(init_net); - sock_put(sk); + sock_release(net-rtnl-sk_socket); net-rtnl = NULL; } } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8ddcd3f..4e5216e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -881,7 +881,7 @@ static void nl_fib_lookup_exit(struct net *net) * initial network namespace. So the socket will be safe to free. */ net-ipv4.fibnl-sk_net = get_net(init_net); - sock_put(net-ipv4.fibnl); + sock_release(net-ipv4.fibnl-sk_socket); } static void fib_disable_ip(struct net_device *dev, int force) -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/4 net-2.6.25] Proper netlink kernel sockets disposal.
Alexey Dobriyan found, that virtualized netlink kernel sockets (fibl rtnl) are leaked during namespace start/stop loop. Leaking fix (simple and obvious) reveals that netlink kernel socket disposal leads to OOPSes: - nl_table[protocol]-listeners is double freed - sometimes during namespace stop netlink_sock_destruct BUG_TRAP(!atomic_read(sk-sk_rmem_alloc)); is hit This set address all these issues. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Tested-by: Alexey Dobriayn [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [NETNS 1/4 net-2.6.25] Double free in netlink_release.
Netlink protocol table is global for all namespaces. Some netlink protocols have been virtualized, i.e. they have per/namespace netlink socket. This difference can easily lead to double free if more than 1 namespace is started. Count the number of kernel netlink sockets to track that this table is not used any more. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Tested-by: Alexey Dobriyan [EMAIL PROTECTED] --- net/netlink/af_netlink.c | 10 +++--- 1 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 21f9e30..29fef55 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -498,9 +498,12 @@ static int netlink_release(struct socket *sock) netlink_table_grab(); if (netlink_is_kernel(sk)) { - kfree(nl_table[sk-sk_protocol].listeners); - nl_table[sk-sk_protocol].module = NULL; - nl_table[sk-sk_protocol].registered = 0; + BUG_ON(nl_table[sk-sk_protocol].registered == 0); + if (--nl_table[sk-sk_protocol].registered == 0) { + kfree(nl_table[sk-sk_protocol].listeners); + nl_table[sk-sk_protocol].module = NULL; + nl_table[sk-sk_protocol].registered = 0; + } } else if (nlk-subscriptions) netlink_update_listeners(sk); netlink_table_ungrab(); @@ -1389,6 +1392,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, nl_table[unit].registered = 1; } else { kfree(listeners); + nl_table[unit].registered++; } netlink_table_ungrab(); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3 net-2.6.25] call FIB rule-action in the correct namespace
FIB rule-action should operate in the same namespace as fib_lookup. This is definitely missed right now. There are two ways to implement this: pass struct net into another rules API call (2 levels) or place netns into rule struct directly. The second approach seems better as the code will grow less. Additionally, the patchset cleanups struct net from fib_rules_register/unregister to have network namespace context at the time of default rules creation. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3 net-2.6.25] Process FIB rule action in the context of the namespace.
Save namespace context on the fib rule at the rule creation time and call routing lookup in the correct namespace. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Acked-by: Daniel Lezcano [EMAIL PROTECTED] --- include/net/fib_rules.h |1 + net/core/fib_rules.c|2 ++ net/ipv4/fib_rules.c|2 +- 3 files changed, 4 insertions(+), 1 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 7f9f4ae..34349f9 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -22,6 +22,7 @@ struct fib_rule u32 target; struct fib_rule * ctarget; struct rcu_head rcu; + struct net *fr_net; }; struct fib_lookup_arg diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3cd4f13..42ccaf5 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -29,6 +29,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r-pref = pref; r-table = table; r-flags = flags; + r-fr_net = ops-fro_net; /* The lock is not required here, the list in unreacheable * at the moment this function is called */ @@ -242,6 +243,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) err = -ENOMEM; goto errout; } + rule-fr_net = net; if (tb[FRA_PRIORITY]) rule-pref = nla_get_u32(tb[FRA_PRIORITY]); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 3b7affd..d2001f1 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -91,7 +91,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, goto errout; } - if ((tbl = fib_get_table(init_net, rule-table)) == NULL) + if ((tbl = fib_get_table(rule-fr_net, rule-table)) == NULL) goto errout; err = tbl-tb_lookup(tbl, flp, (struct fib_result *) arg-result); -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3 net-2.6.25] Add netns to fib_rules_ops.
The backward link from FIB rules operations to the network namespace will allow to simplify the API a bit. Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Acked-by: Daniel Lezcano [EMAIL PROTECTED] --- include/net/fib_rules.h |1 + net/decnet/dn_rules.c |1 + net/ipv4/fib_rules.c|2 ++ net/ipv6/fib6_rules.c |1 + 4 files changed, 5 insertions(+), 0 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 4f47250..6910e01 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -67,6 +67,7 @@ struct fib_rules_ops const struct nla_policy *policy; struct list_headrules_list; struct module *owner; + struct net *fro_net; }; #define FRA_GENERIC_POLICY \ diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index c1fae23..964e658 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -249,6 +249,7 @@ static struct fib_rules_ops dn_fib_rules_ops = { .policy = dn_fib_rule_policy, .rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list), .owner = THIS_MODULE, + .fro_net= init_net, }; void __init dn_fib_rules_init(void) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 72232ab..8d0ebe7 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -315,6 +315,8 @@ int __net_init fib4_rules_init(struct net *net) if (ops == NULL) return -ENOMEM; INIT_LIST_HEAD(ops-rules_list); + ops-fro_net = net; + fib_rules_register(net, ops); err = fib_default_rules_init(ops); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 76437a1..ead5ab2 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -249,6 +249,7 @@ static struct fib_rules_ops fib6_rules_ops = { .policy = fib6_rule_policy, .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list), .owner = THIS_MODULE, + .fro_net= init_net, }; static int __init fib6_default_rules_init(void) -- 1.5.3.rc5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3 net-2.6.25] [NETNS] FIB rules API cleanup.
Remove struct net from fib_rules_register(unregister)/notify_change paths and diet code size a bit. add/remove: 0/0 grow/shrink: 10/12 up/down: 35/-100 (-65) function old new delta notify_rule_change 273 280 +7 trie_show_stats 471 475 +4 fn_trie_delete 473 477 +4 fib_rules_unregister 144 148 +4 fib4_rule_compare119 123 +4 resize 28422845 +3 fn_trie_select_default 515 518 +3 inet_sk_rebuild_header 836 838 +2 fib_trie_seq_show764 766 +2 __devinet_sysctl_register276 278 +2 fn_trie_lookup 11241123 -1 ip_fib_check_default 133 131 -2 devinet_conf_sysctl 223 221 -2 snmp_fold_field 126 123 -3 fn_trie_insert 20912086 -5 inet_create 876 870 -6 fib4_rules_init 197 191 -6 fib_sync_down452 444 -8 inet_gso_send_check 334 325 -9 fib_create_info 30032991 -12 fib_nl_delrule 568 553 -15 fib_nl_newrule 883 852 -31 Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Acked-by: Daniel Lezcano [EMAIL PROTECTED] --- include/net/fib_rules.h |4 ++-- net/core/fib_rules.c| 20 +--- net/decnet/dn_rules.c |4 ++-- net/ipv4/fib_rules.c|6 +++--- net/ipv6/fib6_rules.c |4 ++-- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 6910e01..7f9f4ae 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -102,8 +102,8 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) return frh-table; } -extern int fib_rules_register(struct net *, struct fib_rules_ops *); -extern void fib_rules_unregister(struct net *, struct fib_rules_ops *); +extern int fib_rules_register(struct fib_rules_ops *); +extern void fib_rules_unregister(struct fib_rules_ops *); extern void fib_rules_cleanup_ops(struct fib_rules_ops *); extern int fib_rules_lookup(struct fib_rules_ops *, diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 541728a..3cd4f13 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -37,8 +37,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, } EXPORT_SYMBOL(fib_default_rule_add); -static void notify_rule_change(struct net *net, int event, - struct fib_rule *rule, +static void notify_rule_change(int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid); @@ -72,10 +71,13 @@ static void flush_route_cache(struct fib_rules_ops *ops) ops-flush_cache(); } -int fib_rules_register(struct net *net, struct fib_rules_ops *ops) +int fib_rules_register(struct fib_rules_ops *ops) { int err = -EEXIST; struct fib_rules_ops *o; + struct net *net; + + net = ops-fro_net; if (ops-rule_size sizeof(struct fib_rule)) return -EINVAL; @@ -112,8 +114,9 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); -void fib_rules_unregister(struct net *net, struct fib_rules_ops *ops) +void fib_rules_unregister(struct fib_rules_ops *ops) { + struct net *net = ops-fro_net; spin_lock(net-rules_mod_lock); list_del_rcu(ops-list); @@ -333,7 +336,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) else list_add_rcu(rule-list, ops-rules_list); - notify_rule_change(net, RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); flush_route_cache(ops); rules_ops_put(ops); return 0; @@ -423,7 +426,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } synchronize_rcu(); - notify_rule_change(net, RTM_DELRULE, rule, ops, nlh, + notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).pid); fib_rule_put(rule); flush_route_cache(ops); @@ -561,13 +564,15 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb