Re: [PATCH 6/17 net-2.6.26] [NETNS]: Default arp parameters lookup.

2008-02-19 Thread Denis V. Lunev
On Tue, 2008-02-19 at 10:14 +0100, Daniel Lezcano wrote:
 Denis V. Lunev wrote:
  Default ARP parameters should be findable regardless of the context.
  Required to make inetdev_event working.
  
  Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
  ---
   net/core/neighbour.c |4 +---
   1 files changed, 1 insertions(+), 3 deletions(-)
  
  diff --git a/net/core/neighbour.c b/net/core/neighbour.c
  index c895ad4..45ed620 100644
  --- a/net/core/neighbour.c
  +++ b/net/core/neighbour.c
  @@ -1275,9 +1275,7 @@ static inline struct neigh_parms 
  *lookup_neigh_params(struct neigh_table *tbl,
  struct neigh_parms *p;
  
  for (p = tbl-parms; p; p = p-next) {
  -   if (p-net != net)
  -   continue;
  -   if ((p-dev  p-dev-ifindex == ifindex) ||
  +   if ((p-dev  p-dev-ifindex == ifindex  p-net == net) ||
  (!p-dev  !ifindex))
  return p;
  }
 
 If the values are:
   p-dev == NULL
   ifindex == 0
   p-net != net
 
 The parms should not be taken into account and the looping must 
 continue. But with this modification it is not the case, if we specify 
 parms ifindex == 0, the first parms with the dev field set to NULL will 
 be taken belonging or not to the right net.

They should be taken. In the other case inetdev_event will fail for sure
in the middle. You could check.

These are ARP defaults and I do not see a problem for now to get them.

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/17 net-2.6.26] [NETNS]: Default arp parameters lookup.

2008-02-19 Thread Denis V. Lunev
On Tue, 2008-02-19 at 10:51 +0100, Daniel Lezcano wrote:
 Denis V. Lunev wrote:
  On Tue, 2008-02-19 at 10:14 +0100, Daniel Lezcano wrote:
  Denis V. Lunev wrote:
  Default ARP parameters should be findable regardless of the context.
  Required to make inetdev_event working.
 
  Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
  ---
   net/core/neighbour.c |4 +---
   1 files changed, 1 insertions(+), 3 deletions(-)
 
  diff --git a/net/core/neighbour.c b/net/core/neighbour.c
  index c895ad4..45ed620 100644
  --- a/net/core/neighbour.c
  +++ b/net/core/neighbour.c
  @@ -1275,9 +1275,7 @@ static inline struct neigh_parms 
  *lookup_neigh_params(struct neigh_table *tbl,
struct neigh_parms *p;
 
for (p = tbl-parms; p; p = p-next) {
  - if (p-net != net)
  - continue;
  - if ((p-dev  p-dev-ifindex == ifindex) ||
  + if ((p-dev  p-dev-ifindex == ifindex  p-net == net) ||
(!p-dev  !ifindex))
return p;
}
  If the values are:
 p-dev == NULL
 ifindex == 0
 p-net != net
 
  The parms should not be taken into account and the looping must 
  continue. But with this modification it is not the case, if we specify 
  parms ifindex == 0, the first parms with the dev field set to NULL will 
  be taken belonging or not to the right net.
  
  They should be taken. In the other case inetdev_event will fail for sure
  in the middle. You could check.
  
  These are ARP defaults and I do not see a problem for now to get them.
 
 Because there is a parms default per namespace. So several instances of 
 them per nd table. That was the initial approach with Eric's patchset.
 

These changes are not in mainstream and I do not want to touch ARP as
this is not a simple thing. In reality ARP will be needed only when
we'll have a real device inside a namespace.

Right now I prefer to have minimal set of working changes to finish IP
and upper layers.

Regards,
Den

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] [NETNS]: Namespace leak in pneigh_lookup.

2008-02-19 Thread Denis V. Lunev
release_net is missed on the error path in pneigh_lookup.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/core/neighbour.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 7bb6a9a..174e29e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -507,6 +507,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
if (tbl-pconstructor  tbl-pconstructor(n)) {
if (dev)
dev_put(dev);
+   release_net(net);
kfree(n);
n = NULL;
goto out;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/17 net-2.6.26] [NETNS]: Process ip_rt_redirect in the correct namespace.

2008-02-18 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 525787b..44708ab 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1132,10 +1132,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 
new_gw,
__be32  skeys[2] = { saddr, 0 };
int  ikeys[2] = { dev-ifindex, 0 };
struct netevent_redirect netevent;
+   struct net *net;
 
if (!in_dev)
return;
 
+   net = dev-nd_net;
if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
|| ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
|| ipv4_is_zeronet(new_gw))
@@ -1147,7 +1149,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 
new_gw,
if (IN_DEV_SEC_REDIRECTS(in_dev)  
ip_fib_check_default(new_gw, dev))
goto reject_redirect;
} else {
-   if (inet_addr_type(init_net, new_gw) != RTN_UNICAST)
+   if (inet_addr_type(net, new_gw) != RTN_UNICAST)
goto reject_redirect;
}
 
@@ -1165,7 +1167,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 
new_gw,
rth-fl.fl4_src != skeys[i] ||
rth-fl.oif != ikeys[k] ||
rth-fl.iif != 0 ||
-   rth-rt_genid != atomic_read(rt_genid)) {
+   rth-rt_genid != atomic_read(rt_genid) ||
+   rth-u.dst.dev-nd_net != net) {
rthp = rth-u.dst.rt_next;
continue;
}
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/17 net-2.6.26] [NETNS]: Process /proc/net/rt_cache inside a namespace.

2008-02-18 Thread Denis V. Lunev
Show routing cache for a particular namespace only.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |   10 +++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 67df872..c11e6bf 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -273,6 +273,7 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr)
 
 #ifdef CONFIG_PROC_FS
 struct rt_cache_iter_state {
+   struct seq_net_private p;
int bucket;
int genid;
 };
@@ -285,7 +286,8 @@ static struct rtable *rt_cache_get_first(struct 
rt_cache_iter_state *st)
rcu_read_lock_bh();
r = rcu_dereference(rt_hash_table[st-bucket].chain);
while (r) {
-   if (r-rt_genid == st-genid)
+   if (r-u.dst.dev-nd_net == st-p.net 
+   r-rt_genid == st-genid)
return r;
r = rcu_dereference(r-u.dst.rt_next);
}
@@ -312,6 +314,8 @@ static struct rtable *rt_cache_get_next(struct 
rt_cache_iter_state *st,
struct rtable *r)
 {
while ((r = __rt_cache_get_next(st, r)) != NULL) {
+   if (r-u.dst.dev-nd_net != st-p.net)
+   continue;
if (r-rt_genid == st-genid)
break;
}
@@ -398,7 +402,7 @@ static const struct seq_operations rt_cache_seq_ops = {
 
 static int rt_cache_seq_open(struct inode *inode, struct file *file)
 {
-   return seq_open_private(file, rt_cache_seq_ops,
+   return seq_open_net(inode, file, rt_cache_seq_ops,
sizeof(struct rt_cache_iter_state));
 }
 
@@ -407,7 +411,7 @@ static const struct file_operations rt_cache_seq_fops = {
.open= rt_cache_seq_open,
.read= seq_read,
.llseek  = seq_lseek,
-   .release = seq_release_private,
+   .release = seq_release_net,
 };
 
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/17 net-2.6.26] [NETNS]: Register /proc/net/rt_cache for each namespace.

2008-02-18 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |   24 +---
 1 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c11e6bf..5f67eba 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -545,7 +545,7 @@ static int ip_rt_acct_read(char *buffer, char **start, 
off_t offset,
 }
 #endif
 
-static __init int ip_rt_proc_init(struct net *net)
+static int __net_init ip_rt_do_proc_init(struct net *net)
 {
struct proc_dir_entry *pde;
 
@@ -577,8 +577,26 @@ err2:
 err1:
return -ENOMEM;
 }
+
+static void __net_exit ip_rt_do_proc_exit(struct net *net)
+{
+   remove_proc_entry(rt_cache, net-proc_net_stat);
+   remove_proc_entry(rt_cache, net-proc_net);
+   remove_proc_entry(rt_acct, net-proc_net);
+}
+
+static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
+   .init = ip_rt_do_proc_init,
+   .exit = ip_rt_do_proc_exit,
+};
+
+static int __init ip_rt_proc_init(void)
+{
+   return register_pernet_subsys(ip_rt_proc_ops);
+}
+
 #else
-static inline int ip_rt_proc_init(struct net *net)
+static inline int ip_rt_proc_init(void)
 {
return 0;
 }
@@ -3056,7 +3074,7 @@ int __init ip_rt_init(void)
ip_rt_secret_interval;
add_timer(rt_secret_timer);
 
-   if (ip_rt_proc_init(init_net))
+   if (ip_rt_proc_init())
printk(KERN_ERR Unable to create route proc files\n);
 #ifdef CONFIG_XFRM
xfrm_init();
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/17 net-2.6.26] [NETNS]: Default arp parameters lookup.

2008-02-18 Thread Denis V. Lunev
Default ARP parameters should be findable regardless of the context.
Required to make inetdev_event working.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/core/neighbour.c |4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c895ad4..45ed620 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1275,9 +1275,7 @@ static inline struct neigh_parms 
*lookup_neigh_params(struct neigh_table *tbl,
struct neigh_parms *p;
 
for (p = tbl-parms; p; p = p-next) {
-   if (p-net != net)
-   continue;
-   if ((p-dev  p-dev-ifindex == ifindex) ||
+   if ((p-dev  p-dev-ifindex == ifindex  p-net == net) ||
(!p-dev  !ifindex))
return p;
}
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 9/17 net-2.6.26] [NETNS]: DST cleanup routines should be called inside namespace.

2008-02-18 Thread Denis V. Lunev
Device inside the namespace can be started and downed. So, active routing
cache should be cleaned up on device stop.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/core/dst.c |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/net/core/dst.c b/net/core/dst.c
index 7deef48..3a01a81 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -295,9 +295,6 @@ static int dst_dev_event(struct notifier_block *this, 
unsigned long event, void
struct net_device *dev = ptr;
struct dst_entry *dst, *last = NULL;
 
-   if (dev-nd_net != init_net)
-   return NOTIFY_DONE;
-
switch (event) {
case NETDEV_UNREGISTER:
case NETDEV_DOWN:
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/17 net-2.6.26] [IPV4]: rt_cache_get_next should take rt_genid into account.

2008-02-18 Thread Denis V. Lunev
In the other case /proc/net/rt_cache will look inconsistent in respect to
genid.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Acked-by: Alexey Kuznetsov [EMAIL PROTECTED]
---
 net/ipv4/route.c |   18 +-
 1 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 44708ab..67df872 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -294,7 +294,8 @@ static struct rtable *rt_cache_get_first(struct 
rt_cache_iter_state *st)
return r;
 }
 
-static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct 
rtable *r)
+static struct rtable *__rt_cache_get_next(struct rt_cache_iter_state *st,
+ struct rtable *r)
 {
r = r-u.dst.rt_next;
while (!r) {
@@ -307,16 +308,23 @@ static struct rtable *rt_cache_get_next(struct 
rt_cache_iter_state *st, struct r
return rcu_dereference(r);
 }
 
+static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st,
+   struct rtable *r)
+{
+   while ((r = __rt_cache_get_next(st, r)) != NULL) {
+   if (r-rt_genid == st-genid)
+   break;
+   }
+   return r;
+}
+
 static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t 
pos)
 {
struct rtable *r = rt_cache_get_first(st);
 
if (r)
-   while (pos  (r = rt_cache_get_next(st, r))) {
-   if (r-rt_genid != st-genid)
-   continue;
+   while (pos  (r = rt_cache_get_next(st, r)))
--pos;
-   }
return pos ? NULL : r;
 }
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/17 net-2.6.26] [NETNS]: Disable multicaststing configuration inside non-initial namespace.

2008-02-18 Thread Denis V. Lunev
Do not calls hooks from device notifiers and disallow configuration from
ioctl/netlink layer.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/igmp.c |   39 +++
 1 files changed, 39 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 732cd07..d3f34a7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1198,6 +1198,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 
addr)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
for (im=in_dev-mc_list; im; im=im-next) {
if (im-multiaddr == addr) {
im-users++;
@@ -1277,6 +1280,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 
addr)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
for (ip=in_dev-mc_list; (i=*ip)!=NULL; ip=i-next) {
if (i-multiaddr==addr) {
if (--i-users == 0) {
@@ -1304,6 +1310,9 @@ void ip_mc_down(struct in_device *in_dev)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
for (i=in_dev-mc_list; i; i=i-next)
igmp_group_dropped(i);
 
@@ -1324,6 +1333,9 @@ void ip_mc_init_dev(struct in_device *in_dev)
 {
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
in_dev-mc_tomb = NULL;
 #ifdef CONFIG_IP_MULTICAST
in_dev-mr_gq_running = 0;
@@ -1347,6 +1359,9 @@ void ip_mc_up(struct in_device *in_dev)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
for (i=in_dev-mc_list; i; i=i-next)
@@ -1363,6 +1378,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
/* Deactivate timers */
ip_mc_down(in_dev);
 
@@ -1744,6 +1762,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn 
*imr)
if (!ipv4_is_multicast(addr))
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
in_dev = ip_mc_find_dev(imr);
@@ -1812,6 +1833,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn 
*imr)
u32 ifindex;
int ret = -EADDRNOTAVAIL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
in_dev = ip_mc_find_dev(imr);
ifindex = imr-imr_ifindex;
@@ -1857,6 +1881,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, 
struct
if (!ipv4_is_multicast(addr))
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
imr.imr_multiaddr.s_addr = mreqs-imr_multiaddr;
@@ -1990,6 +2017,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter 
*msf, int ifindex)
msf-imsf_fmode != MCAST_EXCLUDE)
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
imr.imr_multiaddr.s_addr = msf-imsf_multiaddr;
@@ -2070,6 +2100,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
imr.imr_multiaddr.s_addr = msf-imsf_multiaddr;
@@ -2132,6 +2165,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter 
*gsf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
err = -EADDRNOTAVAIL;
@@ -2216,6 +2252,9 @@ void ip_mc_drop_socket(struct sock *sk)
if (inet-mc_list == NULL)
return;
 
+   if (sk-sk_net != init_net)
+   return;
+
rtnl_lock();
while ((iml = inet-mc_list) != NULL) {
struct in_device *in_dev;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/17 net-2.6.26] [NETNS]: Process devinet ioctl in the correct namespace.

2008-02-18 Thread Denis V. Lunev
Add namespace parameter to devinet_ioctl and locate device inside it for
state changes.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/linux/inetdevice.h |2 +-
 net/ipv4/af_inet.c |7 ---
 net/ipv4/devinet.c |6 +++---
 net/ipv4/ipconfig.c|2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index fc4e3db..da05ab4 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -129,7 +129,7 @@ extern int unregister_inetaddr_notifier(struct 
notifier_block *nb);
 
 extern struct net_device *ip_dev_find(struct net *net, __be32 addr);
 extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, 
__be32 b);
-extern int devinet_ioctl(unsigned int cmd, void __user *);
+extern int devinet_ioctl(struct net *net, unsigned int cmd, void 
__user *);
 extern voiddevinet_init(void);
 extern struct in_device*inetdev_by_index(struct net *, int);
 extern __be32  inet_select_addr(const struct net_device *dev, __be32 
dst, int scope);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 09ca529..c270080 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -784,6 +784,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, 
unsigned long arg)
 {
struct sock *sk = sock-sk;
int err = 0;
+   struct net *net = sk-sk_net;
 
switch (cmd) {
case SIOCGSTAMP:
@@ -795,12 +796,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, 
unsigned long arg)
case SIOCADDRT:
case SIOCDELRT:
case SIOCRTMSG:
-   err = ip_rt_ioctl(sk-sk_net, cmd, (void __user *)arg);
+   err = ip_rt_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCDARP:
case SIOCGARP:
case SIOCSARP:
-   err = arp_ioctl(sk-sk_net, cmd, (void __user *)arg);
+   err = arp_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCGIFADDR:
case SIOCSIFADDR:
@@ -813,7 +814,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, 
unsigned long arg)
case SIOCSIFPFLAGS:
case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
-   err = devinet_ioctl(cmd, (void __user *)arg);
+   err = devinet_ioctl(net, cmd, (void __user *)arg);
break;
default:
if (sk-sk_prot-ioctl)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 963e711..f7e78b7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -595,7 +595,7 @@ static __inline__ int inet_abc_len(__be32 addr)
 }
 
 
-int devinet_ioctl(unsigned int cmd, void __user *arg)
+int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 {
struct ifreq ifr;
struct sockaddr_in sin_orig;
@@ -624,7 +624,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
*colon = 0;
 
 #ifdef CONFIG_KMOD
-   dev_load(init_net, ifr.ifr_name);
+   dev_load(net, ifr.ifr_name);
 #endif
 
switch (cmd) {
@@ -665,7 +665,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
rtnl_lock();
 
ret = -ENODEV;
-   if ((dev = __dev_get_by_name(init_net, ifr.ifr_name)) == NULL)
+   if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
goto done;
 
if (colon)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index a52b585..009d78f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -291,7 +291,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct 
ifreq *arg)
 
mm_segment_t oldfs = get_fs();
set_fs(get_ds());
-   res = devinet_ioctl(cmd, (struct ifreq __user *) arg);
+   res = devinet_ioctl(init_net, cmd, (struct ifreq __user *) arg);
set_fs(oldfs);
return res;
 }
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/17 net-2.6.26] [NETNS]: Disable inetaddr notifiers in namespaces other than initial.

2008-02-18 Thread Denis V. Lunev
ip_fib_init is kept enabled. It is already namespace-aware.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 drivers/net/bonding/bond_main.c |3 +++
 drivers/net/via-velocity.c  |3 +++
 drivers/s390/net/qeth_main.c|3 +++
 net/sctp/protocol.c |3 +++
 4 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 0942d82..9666434 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3511,6 +3511,9 @@ static int bond_inetaddr_event(struct notifier_block 
*this, unsigned long event,
struct bonding *bond, *bond_next;
struct vlan_entry *vlan, *vlan_next;
 
+   if (ifa-ifa_dev-dev-nd_net != init_net)
+   return NOTIFY_DONE;
+
list_for_each_entry_safe(bond, bond_next, bond_dev_list, bond_list) {
if (bond-dev == event_dev) {
switch (event) {
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index c50fdee..1525e8a 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -3464,6 +3464,9 @@ static int velocity_netdev_event(struct notifier_block 
*nb, unsigned long notifi
struct velocity_info *vptr;
unsigned long flags;
 
+   if (dev-nd_net != init_net)
+   return NOTIFY_DONE;
+
spin_lock_irqsave(velocity_dev_list_lock, flags);
list_for_each_entry(vptr, velocity_dev_list, list) {
if (vptr-dev == dev) {
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 62606ce..d063e9e 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -8622,6 +8622,9 @@ qeth_ip_event(struct notifier_block *this,
struct qeth_ipaddr *addr;
struct qeth_card *card;
 
+   if (dev-nd_net != init_net)
+   return NOTIFY_DONE;
+
QETH_DBF_TEXT(trace,3,ipevent);
card = qeth_get_card_from_dev(dev);
if (!card)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 22a1657..4475f7e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -629,6 +629,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, 
unsigned long ev,
struct sctp_sockaddr_entry *addr = NULL;
struct sctp_sockaddr_entry *temp;
 
+   if (ifa-ifa_dev-dev-nd_net != init_net)
+   return NOTIFY_DONE;
+
switch (ev) {
case NETDEV_UP:
addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/17 net-2.6.26] [NETNS]: Enable inetdev_event notifier.

2008-02-18 Thread Denis V. Lunev
After all these preparations it is time to enable main IPv4 device
initialization routine inside namespace. It is safe do this now.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f282b26..963e711 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1044,9 +1044,6 @@ static int inetdev_event(struct notifier_block *this, 
unsigned long event,
struct net_device *dev = ptr;
struct in_device *in_dev = __in_dev_get_rtnl(dev);
 
-   if (dev-nd_net != init_net)
-   return NOTIFY_DONE;
-
ASSERT_RTNL();
 
if (!in_dev) {
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/17 net-2.6.26] [NETNS]: Enable IPv4 address manipulations inside namespace.

2008-02-18 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |9 -
 1 files changed, 0 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f7e78b7..aa23d10 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct 
nlmsghdr *nlh, void *arg
 
ASSERT_RTNL();
 
-   if (net != init_net)
-   return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
if (err  0)
goto errout;
@@ -560,9 +557,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct 
nlmsghdr *nlh, void *arg
 
ASSERT_RTNL();
 
-   if (net != init_net)
-   return -EINVAL;
-
ifa = rtm_to_ifaddr(net, nlh);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
@@ -1169,9 +1163,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct 
netlink_callback *cb)
struct in_ifaddr *ifa;
int s_ip_idx, s_idx = cb-args[0];
 
-   if (net != init_net)
-   return 0;
-
s_ip_idx = ip_idx = cb-args[1];
idx = 0;
for_each_netdev(net, dev) {
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 17/17 net-2.6.26] [NETNS]: Process inet_select_addr inside a namespace.

2008-02-18 Thread Denis V. Lunev
The context is available from a network device passed in.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index aa23d10..033670d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -871,6 +871,7 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
 {
__be32 addr = 0;
struct in_device *in_dev;
+   struct net *net = dev-nd_net;
 
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
@@ -899,7 +900,7 @@ no_in_dev:
 */
read_lock(dev_base_lock);
rcu_read_lock();
-   for_each_netdev(init_net, dev) {
+   for_each_netdev(net, dev) {
if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
continue;
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/17 net-2.6.26] [NETNS]: Enable all routing manipulation via netlink inside namespace.

2008-02-18 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |   16 
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5f67eba..79e2e8a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2702,9 +2702,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, 
struct nlmsghdr* nlh, void
int err;
struct sk_buff *skb;
 
-   if (net != init_net)
-   return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
if (err  0)
goto errout;
@@ -2734,7 +2731,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, 
struct nlmsghdr* nlh, void
if (iif) {
struct net_device *dev;
 
-   dev = __dev_get_by_index(init_net, iif);
+   dev = __dev_get_by_index(net, iif);
if (dev == NULL) {
err = -ENODEV;
goto errout_free;
@@ -2760,7 +2757,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, 
struct nlmsghdr* nlh, void
},
.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
};
-   err = ip_route_output_key(init_net, rt, fl);
+   err = ip_route_output_key(net, rt, fl);
}
 
if (err)
@@ -2771,11 +2768,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, 
struct nlmsghdr* nlh, void
rt-rt_flags |= RTCF_NOTIFY;
 
err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh-nlmsg_seq,
-   RTM_NEWROUTE, 0, 0);
+  RTM_NEWROUTE, 0, 0);
if (err = 0)
goto errout_free;
 
-   err = rtnl_unicast(skb, init_net, NETLINK_CB(in_skb).pid);
+   err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
 errout:
return err;
 
@@ -2789,6 +2786,9 @@ int ip_rt_dump(struct sk_buff *skb,  struct 
netlink_callback *cb)
struct rtable *rt;
int h, s_h;
int idx, s_idx;
+   struct net *net;
+
+   net = skb-sk-sk_net;
 
s_h = cb-args[0];
if (s_h  0)
@@ -2798,7 +2798,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct 
netlink_callback *cb)
rcu_read_lock_bh();
for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
 rt = rcu_dereference(rt-u.dst.rt_next), idx++) {
-   if (idx  s_idx)
+   if (rt-u.dst.dev-nd_net != net || idx  s_idx)
continue;
if (rt-rt_genid != atomic_read(rt_genid))
continue;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/17 net-2.6.26] [NETNS]: Register neighbour table parameters in the correct namespace.

2008-02-18 Thread Denis V. Lunev
neigh_sysctl_register should register sysctl entries inside correct namespace
to avoid naming conflict. Typical example is a loopback. Entries for it
present in all namespaces.

Required to make inetdev_event working.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/core/neighbour.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 7bb6a9a..c895ad4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2732,7 +2732,8 @@ int neigh_sysctl_register(struct net_device *dev, struct 
neigh_parms *p,
neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
 
-   t-sysctl_header = register_sysctl_paths(neigh_path, t-neigh_vars);
+   t-sysctl_header =
+   register_net_sysctl_table(p-net, neigh_path, t-neigh_vars);
if (!t-sysctl_header)
goto free_procname;
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/17 net-2.6.26] [NETFILTER]: Consolidate masq_inet_event and masq_device_event.

2008-02-18 Thread Denis V. Lunev
They do exactly the same job.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/netfilter/ipt_MASQUERADE.c |   14 ++
 1 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c 
b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d80fee8..313b3fc 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this,
   unsigned long event,
   void *ptr)
 {
-   const struct net_device *dev = ((struct in_ifaddr *)ptr)-ifa_dev-dev;
-
-   if (event == NETDEV_DOWN) {
-   /* IP address was deleted.  Search entire table for
-  conntracks which were associated with that device,
-  and forget them. */
-   NF_CT_ASSERT(dev-ifindex != 0);
-
-   nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev-ifindex);
-   }
-
-   return NOTIFY_DONE;
+   struct net_device *dev = ((struct in_ifaddr *)ptr)-ifa_dev-dev;
+   return masq_device_event(this, event, dev);
 }
 
 static struct notifier_block masq_dev_notifier = {
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/17 net-2.6.26] [IPV4]: Remove check for ifa-ifa_dev != NULL.

2008-02-18 Thread Denis V. Lunev
This is a callback registered to inet address notifier chain.
The check is useless as:
- ifa-ifa_dev is always != NULL
- similar checks are abscent in all other notifiers.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/atm/clip.c |4 
 1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/net/atm/clip.c b/net/atm/clip.c
index 86b885e..dd96440 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -648,10 +648,6 @@ static int clip_inet_event(struct notifier_block *this, 
unsigned long event,
struct in_device *in_dev;
 
in_dev = ((struct in_ifaddr *)ifa)-ifa_dev;
-   if (!in_dev || !in_dev-dev) {
-   printk(KERN_WARNING clip_inet_event: no device\n);
-   return NOTIFY_DONE;
-   }
/*
 * Transitions are of the down-change-up type, so it's sufficient to
 * handle the change on up.
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/17 net-2.6.26] [IPV4]: Remove ifa != NULL check.

2008-02-18 Thread Denis V. Lunev
This is a callback registered to inet address notifier chain.
The check is useless as:
- ifa is always != NULL
- similar checks are abscent in all other notifiers.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 drivers/net/via-velocity.c |   22 ++
 1 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index cc0addb..c50fdee 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -3460,21 +3460,19 @@ static int velocity_resume(struct pci_dev *pdev)
 static int velocity_netdev_event(struct notifier_block *nb, unsigned long 
notification, void *ptr)
 {
struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
+   struct net_device *dev = ifa-ifa_dev-dev;
+   struct velocity_info *vptr;
+   unsigned long flags;
 
-   if (ifa) {
-   struct net_device *dev = ifa-ifa_dev-dev;
-   struct velocity_info *vptr;
-   unsigned long flags;
-
-   spin_lock_irqsave(velocity_dev_list_lock, flags);
-   list_for_each_entry(vptr, velocity_dev_list, list) {
-   if (vptr-dev == dev) {
-   velocity_get_ip(vptr);
-   break;
-   }
+   spin_lock_irqsave(velocity_dev_list_lock, flags);
+   list_for_each_entry(vptr, velocity_dev_list, list) {
+   if (vptr-dev == dev) {
+   velocity_get_ip(vptr);
+   break;
}
-   spin_unlock_irqrestore(velocity_dev_list_lock, flags);
}
+   spin_unlock_irqrestore(velocity_dev_list_lock, flags);
+
return NOTIFY_DONE;
 }
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/17] Finish IPv4 infrastructure namespacing.

2008-02-18 Thread Denis V. Lunev
This set finally allows to manipulate with network devices inside a
namespace and allows to configure them [via netlink]. 'route' is not yet
supported (but prepared to) as it requires a socket.

Additionally, better routing cache support is added.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Network namespace and tc?

2008-02-14 Thread Denis V. Lunev
Hello, Stephen!

Namespaces are not fully implemented yet :) Right now we we have only
basic infrastructure in the mainstream and, currently, we can't even run
TCP in different namespace :( We hope this will be changed very soon.

These marks (net != init_net) are used to
- mark places we need to modify
- ensure that we do not break initial namespace.

Regards,
Den

On Wed, 2008-02-13 at 15:59 -0800, Stephen Hemminger wrote:
 It looks like tc filter won't work on alternate namespaces:
 /* Add/change/delete/get a filter node */
 
 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 ...
 
   if (net != init_net)
   return -EINVAL;
 
 
 Haven't played with namespace virtualization yet, but what else is
 not supported?  Where is this documented?
 --
 To unsubscribe from this list: send the line unsubscribe netdev in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [IPV4]: Remove warning in node_set_parent.

2008-02-12 Thread Denis V. Lunev
ugly :), but

Acked-by: Denis V. Lunev [EMAIL PROTECTED]

On Mon, 2008-02-11 at 11:48 -0800, Stephen Hemminger wrote:
 On Mon, 11 Feb 2008 11:47:17 +0300
 Denis V. Lunev [EMAIL PROTECTED] wrote:
 
  net/ipv4/fib_trie.c: In function 'node_set_parent':
  net/ipv4/fib_trie.c:184: warning: assignment makes integer from pointer
  without a cast
  
  Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
  ---
   net/ipv4/fib_trie.c |7 ---
   1 files changed, 4 insertions(+), 3 deletions(-)
  
  diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
  index f5fba3f..1753cd4 100644
  --- a/net/ipv4/fib_trie.c
  +++ b/net/ipv4/fib_trie.c
  @@ -177,10 +177,11 @@ static inline struct tnode *node_parent_rcu(struct 
  node *node)
  return rcu_dereference(ret);
   }
   
  -static inline void node_set_parent(struct node *node, struct tnode *ptr)
  +static inline void node_set_parent(struct node *node, struct tnode *__ptr)
   {
  -   rcu_assign_pointer(node-parent,
  -  (unsigned long)ptr | NODE_TYPE(node));
  +   struct node *ptr;
  +   ptr = (struct node *)((unsigned long)__ptr | NODE_TYPE(node));
  +   rcu_assign_pointer(node-parent, ptr);
   }
   
   static inline struct node *tnode_get_child(struct tnode *tn, unsigned int 
  i)
 
 No, this causes new warning from assigning pointer (ptr) to integer 
 node-parent.
 
 Why not just change rcupdate.h to do the right thing.
 
 From a00f7cbf1c2f2282eced236e1e8b99b0fecd213a Mon Sep 17 00:00:00 2001
 From: Stephen Hemminger [EMAIL PROTECTED]
 Date: Mon, 11 Feb 2008 11:28:13 -0800
 Subject: [PATCH] eliminate warnings when rcu_assign_pointer is used with 
 unsigned long
 
 It is reasonable to use RCU with non-pointer values, and describe
 the optimization.
 
 Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]
 ---
  include/linux/rcupdate.h |   13 +++--
  1 files changed, 7 insertions(+), 6 deletions(-)
 
 diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
 index 37a642c..c44ac87 100644
 --- a/include/linux/rcupdate.h
 +++ b/include/linux/rcupdate.h
 @@ -172,14 +172,15 @@ struct rcu_head {
   * structure after the pointer assignment.  More importantly, this
   * call documents which pointers will be dereferenced by RCU read-side
   * code.
 + *
 + * If value is the NULL (constant 0), then no barrier is needed.
   */
  
 -#define rcu_assign_pointer(p, v) \
 - ({ \
 - if (!__builtin_constant_p(v) || \
 - ((v) != NULL)) \
 - smp_wmb(); \
 - (p) = (v); \
 +#define rcu_assign_pointer(p, v) \
 + ({  \
 + if (!(__builtin_constant_p(v)  v))\
 + smp_wmb();  \
 + (p) = (v);  \
   })
  
  /**
 -- 
 1.5.3.8
 
 
 
 

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] [IPV4]: Remove warning in node_set_parent.

2008-02-11 Thread Denis V. Lunev
net/ipv4/fib_trie.c: In function 'node_set_parent':
net/ipv4/fib_trie.c:184: warning: assignment makes integer from pointer
without a cast

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/fib_trie.c |7 ---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f5fba3f..1753cd4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -177,10 +177,11 @@ static inline struct tnode *node_parent_rcu(struct node 
*node)
return rcu_dereference(ret);
 }
 
-static inline void node_set_parent(struct node *node, struct tnode *ptr)
+static inline void node_set_parent(struct node *node, struct tnode *__ptr)
 {
-   rcu_assign_pointer(node-parent,
-  (unsigned long)ptr | NODE_TYPE(node));
+   struct node *ptr;
+   ptr = (struct node *)((unsigned long)__ptr | NODE_TYPE(node));
+   rcu_assign_pointer(node-parent, ptr);
 }
 
 static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: cgroup: limit network bandwidth

2008-02-08 Thread Denis V. Lunev
Hello, Andrea!

I have occasionally seen your patch on LWN (missed one in netdev@) and
have two words about. May be this is not too late. I have missed my
entire mailbox yesterday and have not followed the discussion. Pls
forgive me.

Rate-limiting message receive is nothing good at all. First, if we talk
about i386, the most important resource is low memory. There are no more
than 1 Gb of it. You suggest to keep it used more time than usual and
this usage will not reduce network traffic to the node for UDP cases.

For TCP the situation is slightly better. But not quite a big. For a
case of rather slow group with a bug traffic you will just eat 64kb *
Nsockets of receive buffers.

So, resource usage is just increased for a case. This is unfortunate. In
order to proper rate-limiting you need to calculate memory used
- dropping incoming packets early for UDP
- manage TCP window on the base of buffer memory used by the cgroup

Regards,
Den

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [NETNS] Remove unused member (dst_net) of dst_ops.

2008-02-08 Thread Denis V. Lunev
This has been added by the Daniel Lezcano [EMAIL PROTECTED] in the
commit d4fa26ff44e31c2636a985e3092e2cd55d8045de. It looks to me a
preparatory staff for IPv6 namespacing.

I think this is not needed in 2.6.25  but will be required in 2.6.26
very soon.

Regards,
Den

On Fri, 2008-02-08 at 13:24 +0200, Rami Rosen wrote:
 Hi,
   This patches removes dst_net member (a pointer to struct net)
   of dst_ops (/include/net/dst.h).
   
   Current network namespace implementation does not use it at all.
 
 Denis - any comments ?
   
 
 Regards,
 Rami Rosen
 
 
 Signed-off-by: Rami Rosen [EMAIL PROTECTED]

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] [IPV6]: dst_entry leak in ip4ip6_err.

2008-02-08 Thread Denis V. Lunev
The result of the ip_route_output is not assigned to skb. This means that
- it is leaked
- possible OOPS below dereferrencing skb-dst
- no ICMP message for this case

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv6/ip6_tunnel.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9031e52..cd94064 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -550,6 +550,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ip_rt_put(rt);
goto out;
}
+   skb2-dst = (struct dst_entry *)rt;
} else {
ip_rt_put(rt);
if (ip_route_input(skb2, eiph-daddr, eiph-saddr, eiph-tos,
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] [IGMP]: Optimize kfree_skb in igmp_rcv.

2008-02-08 Thread Denis V. Lunev
Merge error paths inside igmp_rcv.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/igmp.c |   13 ++---
 1 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index fe2e6cd..d3f34a7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -922,13 +922,11 @@ int igmp_rcv(struct sk_buff *skb)
struct in_device *in_dev = in_dev_get(skb-dev);
int len = skb-len;
 
-   if (in_dev==NULL) {
-   kfree_skb(skb);
-   return 0;
-   }
+   if (in_dev == NULL)
+   goto drop;
 
if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
-   goto drop;
+   goto drop_ref;
 
switch (skb-ip_summed) {
case CHECKSUM_COMPLETE:
@@ -938,7 +936,7 @@ int igmp_rcv(struct sk_buff *skb)
case CHECKSUM_NONE:
skb-csum = 0;
if (__skb_checksum_complete(skb))
-   goto drop;
+   goto drop_ref;
}
 
ih = igmp_hdr(skb);
@@ -972,8 +970,9 @@ int igmp_rcv(struct sk_buff *skb)
break;
}
 
-drop:
+drop_ref:
in_dev_put(in_dev);
+drop:
kfree_skb(skb);
return 0;
 }
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 17/17] [NETNS]: Process inet_select_addr inside a namespace.

2008-02-06 Thread Denis V. Lunev
The context is available from a network device passed in.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index aa23d10..d06a4e6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -871,12 +871,14 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
 {
__be32 addr = 0;
struct in_device *in_dev;
+   struct net *net;
 
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
goto no_in_dev;
 
+   net = dev-nd_net;
for_primary_ifa(in_dev) {
if (ifa-ifa_scope  scope)
continue;
@@ -899,7 +901,7 @@ no_in_dev:
 */
read_lock(dev_base_lock);
rcu_read_lock();
-   for_each_netdev(init_net, dev) {
+   for_each_netdev(net, dev) {
if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
continue;
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/17] [NETNS]: Enable all routing manipulation via netlink inside namespace.

2008-02-06 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |   16 
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8a31e33..92ff622 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2672,9 +2672,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, 
struct nlmsghdr* nlh, void
int err;
struct sk_buff *skb;
 
-   if (net != init_net)
-   return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
if (err  0)
goto errout;
@@ -2704,7 +2701,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, 
struct nlmsghdr* nlh, void
if (iif) {
struct net_device *dev;
 
-   dev = __dev_get_by_index(init_net, iif);
+   dev = __dev_get_by_index(net, iif);
if (dev == NULL) {
err = -ENODEV;
goto errout_free;
@@ -2730,7 +2727,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, 
struct nlmsghdr* nlh, void
},
.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
};
-   err = ip_route_output_key(init_net, rt, fl);
+   err = ip_route_output_key(net, rt, fl);
}
 
if (err)
@@ -2741,11 +2738,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, 
struct nlmsghdr* nlh, void
rt-rt_flags |= RTCF_NOTIFY;
 
err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh-nlmsg_seq,
-   RTM_NEWROUTE, 0, 0);
+  RTM_NEWROUTE, 0, 0);
if (err = 0)
goto errout_free;
 
-   err = rtnl_unicast(skb, init_net, NETLINK_CB(in_skb).pid);
+   err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
 errout:
return err;
 
@@ -2759,6 +2756,9 @@ int ip_rt_dump(struct sk_buff *skb,  struct 
netlink_callback *cb)
struct rtable *rt;
int h, s_h;
int idx, s_idx;
+   struct net *net;
+
+   net = skb-sk-sk_net;
 
s_h = cb-args[0];
if (s_h  0)
@@ -2768,7 +2768,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct 
netlink_callback *cb)
rcu_read_lock_bh();
for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
 rt = rcu_dereference(rt-u.dst.rt_next), idx++) {
-   if (idx  s_idx)
+   if (rt-u.dst.dev-nd_net != net || idx  s_idx)
continue;
if (rt-rt_genid != atomic_read(rt_genid))
continue;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/17] [NETNS]: Process devinet ioctl in the correct namespace.

2008-02-06 Thread Denis V. Lunev
Add namespace parameter to devinet_ioctl and locate device inside it for
a state changes.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/linux/inetdevice.h |2 +-
 net/ipv4/af_inet.c |7 ---
 net/ipv4/devinet.c |6 +++---
 net/ipv4/ipconfig.c|2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index fc4e3db..da05ab4 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -129,7 +129,7 @@ extern int unregister_inetaddr_notifier(struct 
notifier_block *nb);
 
 extern struct net_device *ip_dev_find(struct net *net, __be32 addr);
 extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, 
__be32 b);
-extern int devinet_ioctl(unsigned int cmd, void __user *);
+extern int devinet_ioctl(struct net *net, unsigned int cmd, void 
__user *);
 extern voiddevinet_init(void);
 extern struct in_device*inetdev_by_index(struct net *, int);
 extern __be32  inet_select_addr(const struct net_device *dev, __be32 
dst, int scope);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 09ca529..c270080 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -784,6 +784,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, 
unsigned long arg)
 {
struct sock *sk = sock-sk;
int err = 0;
+   struct net *net = sk-sk_net;
 
switch (cmd) {
case SIOCGSTAMP:
@@ -795,12 +796,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, 
unsigned long arg)
case SIOCADDRT:
case SIOCDELRT:
case SIOCRTMSG:
-   err = ip_rt_ioctl(sk-sk_net, cmd, (void __user *)arg);
+   err = ip_rt_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCDARP:
case SIOCGARP:
case SIOCSARP:
-   err = arp_ioctl(sk-sk_net, cmd, (void __user *)arg);
+   err = arp_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCGIFADDR:
case SIOCSIFADDR:
@@ -813,7 +814,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, 
unsigned long arg)
case SIOCSIFPFLAGS:
case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
-   err = devinet_ioctl(cmd, (void __user *)arg);
+   err = devinet_ioctl(net, cmd, (void __user *)arg);
break;
default:
if (sk-sk_prot-ioctl)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f282b26..a06fcae 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -595,7 +595,7 @@ static __inline__ int inet_abc_len(__be32 addr)
 }
 
 
-int devinet_ioctl(unsigned int cmd, void __user *arg)
+int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 {
struct ifreq ifr;
struct sockaddr_in sin_orig;
@@ -624,7 +624,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
*colon = 0;
 
 #ifdef CONFIG_KMOD
-   dev_load(init_net, ifr.ifr_name);
+   dev_load(net, ifr.ifr_name);
 #endif
 
switch (cmd) {
@@ -665,7 +665,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
rtnl_lock();
 
ret = -ENODEV;
-   if ((dev = __dev_get_by_name(init_net, ifr.ifr_name)) == NULL)
+   if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
goto done;
 
if (colon)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index a52b585..009d78f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -291,7 +291,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct 
ifreq *arg)
 
mm_segment_t oldfs = get_fs();
set_fs(get_ds());
-   res = devinet_ioctl(cmd, (struct ifreq __user *) arg);
+   res = devinet_ioctl(init_net, cmd, (struct ifreq __user *) arg);
set_fs(oldfs);
return res;
 }
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/17] [NETNS]: Register /proc/net/rt_cache for each namespace.

2008-02-06 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |   24 +---
 1 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cc002d8..84da794 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -545,7 +545,7 @@ static int ip_rt_acct_read(char *buffer, char **start, 
off_t offset,
 }
 #endif
 
-static __init int ip_rt_proc_init(struct net *net)
+static int __net_init ip_rt_do_proc_init(struct net *net)
 {
struct proc_dir_entry *pde;
 
@@ -577,8 +577,26 @@ err2:
 err1:
return -ENOMEM;
 }
+
+static void __net_exit ip_rt_do_proc_exit(struct net *net)
+{
+   remove_proc_entry(rt_cache, net-proc_net_stat);
+   remove_proc_entry(rt_cache, net-proc_net);
+   remove_proc_entry(rt_acct, net-proc_net);
+}
+
+static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
+   .init = ip_rt_do_proc_init,
+   .exit = ip_rt_do_proc_exit,
+};
+
+static int __init ip_rt_proc_init(void)
+{
+   return register_pernet_subsys(ip_rt_proc_ops);
+}
+
 #else
-static inline int ip_rt_proc_init(struct net *net)
+static inline int ip_rt_proc_init(void)
 {
return 0;
 }
@@ -3056,7 +3074,7 @@ int __init ip_rt_init(void)
ip_rt_secret_interval;
add_timer(rt_secret_timer);
 
-   if (ip_rt_proc_init(init_net))
+   if (ip_rt_proc_init())
printk(KERN_ERR Unable to create route proc files\n);
 #ifdef CONFIG_XFRM
xfrm_init();
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/17] [NETNS]: Process ip_rt_redirect in the correct namespace.

2008-02-06 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8842ecb..8a31e33 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1132,10 +1132,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 
new_gw,
__be32  skeys[2] = { saddr, 0 };
int  ikeys[2] = { dev-ifindex, 0 };
struct netevent_redirect netevent;
+   struct net *net;
 
if (!in_dev)
return;
 
+   net = dev-nd_net;
if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
|| ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
|| ipv4_is_zeronet(new_gw))
@@ -1147,7 +1149,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 
new_gw,
if (IN_DEV_SEC_REDIRECTS(in_dev)  
ip_fib_check_default(new_gw, dev))
goto reject_redirect;
} else {
-   if (inet_addr_type(init_net, new_gw) != RTN_UNICAST)
+   if (inet_addr_type(net, new_gw) != RTN_UNICAST)
goto reject_redirect;
}
 
@@ -1165,7 +1167,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 
new_gw,
rth-fl.fl4_src != skeys[i] ||
rth-fl.oif != ikeys[k] ||
rth-fl.iif != 0 ||
-   rth-rt_genid != atomic_read(rt_genid)) {
+   rth-rt_genid != atomic_read(rt_genid) ||
+   rth-u.dst.dev-nd_net != net) {
rthp = rth-u.dst.rt_next;
continue;
}
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/17] [NETNS]: Enable inetdev_event notifier.

2008-02-06 Thread Denis V. Lunev
After all these preparations it is time to enable main IPv4 device
initialization routine inside namespace. It is safe do this now.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a06fcae..f7e78b7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1044,9 +1044,6 @@ static int inetdev_event(struct notifier_block *this, 
unsigned long event,
struct net_device *dev = ptr;
struct in_device *in_dev = __in_dev_get_rtnl(dev);
 
-   if (dev-nd_net != init_net)
-   return NOTIFY_DONE;
-
ASSERT_RTNL();
 
if (!in_dev) {
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/17] [NETNS]: Disable address notifiers in namespaces other than initial.

2008-02-06 Thread Denis V. Lunev
ip_fib_init is keeped enabled. It is already namespace-aware.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 drivers/net/bonding/bond_main.c |3 +++
 drivers/net/via-velocity.c  |3 +++
 drivers/s390/net/qeth_main.c|3 +++
 net/sctp/protocol.c |3 +++
 4 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 0942d82..9666434 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3511,6 +3511,9 @@ static int bond_inetaddr_event(struct notifier_block 
*this, unsigned long event,
struct bonding *bond, *bond_next;
struct vlan_entry *vlan, *vlan_next;
 
+   if (ifa-ifa_dev-dev-nd_net != init_net)
+   return NOTIFY_DONE;
+
list_for_each_entry_safe(bond, bond_next, bond_dev_list, bond_list) {
if (bond-dev == event_dev) {
switch (event) {
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 7ff4509..d659834 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -3464,6 +3464,9 @@ static int velocity_netdev_event(struct notifier_block 
*nb, unsigned long notifi
struct velocity_info *vptr;
unsigned long flags;
 
+   if (dev-nd_net != init_net)
+   return NOTIFY_DONE;
+
spin_lock_irqsave(velocity_dev_list_lock, flags);
list_for_each_entry(vptr, velocity_dev_list, list) {
if (vptr-dev == dev) {
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 62606ce..d063e9e 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -8622,6 +8622,9 @@ qeth_ip_event(struct notifier_block *this,
struct qeth_ipaddr *addr;
struct qeth_card *card;
 
+   if (dev-nd_net != init_net)
+   return NOTIFY_DONE;
+
QETH_DBF_TEXT(trace,3,ipevent);
card = qeth_get_card_from_dev(dev);
if (!card)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1339742..20f7e4a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -629,6 +629,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, 
unsigned long ev,
struct sctp_sockaddr_entry *addr = NULL;
struct sctp_sockaddr_entry *temp;
 
+   if (ifa-ifa_dev-dev-nd_net != init_net)
+   return NOTIFY_DONE;
+
switch (ev) {
case NETDEV_UP:
addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/17] [NETNS]: Default arp parameters lookup.

2008-02-06 Thread Denis V. Lunev
Default ARP parameters should be findable regardless of the context.
Required to make inetdev_event working.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/core/neighbour.c |4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1ed7b0a..ea44b8d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1281,9 +1281,7 @@ static inline struct neigh_parms 
*lookup_neigh_params(struct neigh_table *tbl,
struct neigh_parms *p;
 
for (p = tbl-parms; p; p = p-next) {
-   if (p-net != net)
-   continue;
-   if ((p-dev  p-dev-ifindex == ifindex) ||
+   if ((p-dev  p-dev-ifindex == ifindex  p-net == net) ||
(!p-dev  !ifindex))
return p;
}
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/17] [NETNS]: Register neighbour parameters of the net device in the correct namespace.

2008-02-06 Thread Denis V. Lunev
neigh_sysctl_register should register sysctl entries inside correct namespace
to avoid naming conflict. Typical example is a loopback. Entries for it
present in all namespaces.

Required to make inetdev_event working.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/core/neighbour.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a16cf1e..1ed7b0a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2738,7 +2738,8 @@ int neigh_sysctl_register(struct net_device *dev, struct 
neigh_parms *p,
neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
 
-   t-sysctl_header = register_sysctl_paths(neigh_path, t-neigh_vars);
+   t-sysctl_header =
+   register_net_sysctl_table(p-net, neigh_path, t-neigh_vars);
if (!t-sysctl_header)
goto free_procname;
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/17] [IPV4]: Consolidate masq_inet_event and masq_device_event.

2008-02-06 Thread Denis V. Lunev
They do exactly the same job.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/netfilter/ipt_MASQUERADE.c |   14 ++
 1 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c 
b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d80fee8..313b3fc 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this,
   unsigned long event,
   void *ptr)
 {
-   const struct net_device *dev = ((struct in_ifaddr *)ptr)-ifa_dev-dev;
-
-   if (event == NETDEV_DOWN) {
-   /* IP address was deleted.  Search entire table for
-  conntracks which were associated with that device,
-  and forget them. */
-   NF_CT_ASSERT(dev-ifindex != 0);
-
-   nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev-ifindex);
-   }
-
-   return NOTIFY_DONE;
+   struct net_device *dev = ((struct in_ifaddr *)ptr)-ifa_dev-dev;
+   return masq_device_event(this, event, dev);
 }
 
 static struct notifier_block masq_dev_notifier = {
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/17] [IPV4]: Remove ifa != NULL check.

2008-02-06 Thread Denis V. Lunev
This is a callback registered to inet address notifiers chains.
The check is useless as:
- ifa is always != NULL
- similar checks are abscent in all other notifiers.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 drivers/net/via-velocity.c |   22 ++
 1 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 8c9fb82..7ff4509 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -3460,21 +3460,19 @@ static int velocity_resume(struct pci_dev *pdev)
 static int velocity_netdev_event(struct notifier_block *nb, unsigned long 
notification, void *ptr)
 {
struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
+   struct net_device *dev = ifa-ifa_dev-dev;
+   struct velocity_info *vptr;
+   unsigned long flags;
 
-   if (ifa) {
-   struct net_device *dev = ifa-ifa_dev-dev;
-   struct velocity_info *vptr;
-   unsigned long flags;
-
-   spin_lock_irqsave(velocity_dev_list_lock, flags);
-   list_for_each_entry(vptr, velocity_dev_list, list) {
-   if (vptr-dev == dev) {
-   velocity_get_ip(vptr);
-   break;
-   }
+   spin_lock_irqsave(velocity_dev_list_lock, flags);
+   list_for_each_entry(vptr, velocity_dev_list, list) {
+   if (vptr-dev == dev) {
+   velocity_get_ip(vptr);
+   break;
}
-   spin_unlock_irqrestore(velocity_dev_list_lock, flags);
}
+   spin_unlock_irqrestore(velocity_dev_list_lock, flags);
+
return NOTIFY_DONE;
 }
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/17] Finish IPv4 infrastructure namespacing

2008-02-06 Thread Denis V. Lunev
This set finally allows to manipulate with network devices inside a
namespace and allows to configure them [via netlink]. 'route' is not yet
supported (but prepared to).

Additionally, better routing cache support is added.

By the way, working ICMP is behind a couple of patches after this set :)

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/17] [IPV4]: Remove check for ifa-ifa_dev != NULL.

2008-02-06 Thread Denis V. Lunev
This is a callback registered to inet address notifiers chains.
The check is useless as:
- ifa-ifa_dev is always != NULL
- similar checks are abscent in all other notifiers.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/atm/clip.c |4 
 1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/net/atm/clip.c b/net/atm/clip.c
index 86b885e..dd96440 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -648,10 +648,6 @@ static int clip_inet_event(struct notifier_block *this, 
unsigned long event,
struct in_device *in_dev;
 
in_dev = ((struct in_ifaddr *)ifa)-ifa_dev;
-   if (!in_dev || !in_dev-dev) {
-   printk(KERN_WARNING clip_inet_event: no device\n);
-   return NOTIFY_DONE;
-   }
/*
 * Transitions are of the down-change-up type, so it's sufficient to
 * handle the change on up.
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 9/17] [NETNS]: DST cleanup routines should be called inside namespace.

2008-02-06 Thread Denis V. Lunev
Device inside the namespace can be started and downed. So, active routing
cache should be cleaned up on device stop.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/core/dst.c |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/net/core/dst.c b/net/core/dst.c
index 7deef48..3a01a81 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -295,9 +295,6 @@ static int dst_dev_event(struct notifier_block *this, 
unsigned long event, void
struct net_device *dev = ptr;
struct dst_entry *dst, *last = NULL;
 
-   if (dev-nd_net != init_net)
-   return NOTIFY_DONE;
-
switch (event) {
case NETDEV_UNREGISTER:
case NETDEV_DOWN:
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/17] [NETNS]: Disable multicaststing configuration inside namespace.

2008-02-06 Thread Denis V. Lunev
Do not calls hooks from device notifiers and disallow configuration from
ioctl/netlink layer.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/igmp.c |   39 +++
 1 files changed, 39 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 994648b..fe2e6cd 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1199,6 +1199,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 
addr)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
for (im=in_dev-mc_list; im; im=im-next) {
if (im-multiaddr == addr) {
im-users++;
@@ -1278,6 +1281,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 
addr)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
for (ip=in_dev-mc_list; (i=*ip)!=NULL; ip=i-next) {
if (i-multiaddr==addr) {
if (--i-users == 0) {
@@ -1305,6 +1311,9 @@ void ip_mc_down(struct in_device *in_dev)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
for (i=in_dev-mc_list; i; i=i-next)
igmp_group_dropped(i);
 
@@ -1325,6 +1334,9 @@ void ip_mc_init_dev(struct in_device *in_dev)
 {
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
in_dev-mc_tomb = NULL;
 #ifdef CONFIG_IP_MULTICAST
in_dev-mr_gq_running = 0;
@@ -1348,6 +1360,9 @@ void ip_mc_up(struct in_device *in_dev)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
for (i=in_dev-mc_list; i; i=i-next)
@@ -1364,6 +1379,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 
ASSERT_RTNL();
 
+   if (in_dev-dev-nd_net != init_net)
+   return;
+
/* Deactivate timers */
ip_mc_down(in_dev);
 
@@ -1745,6 +1763,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn 
*imr)
if (!ipv4_is_multicast(addr))
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
in_dev = ip_mc_find_dev(imr);
@@ -1813,6 +1834,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn 
*imr)
u32 ifindex;
int ret = -EADDRNOTAVAIL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
in_dev = ip_mc_find_dev(imr);
ifindex = imr-imr_ifindex;
@@ -1858,6 +1882,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, 
struct
if (!ipv4_is_multicast(addr))
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
imr.imr_multiaddr.s_addr = mreqs-imr_multiaddr;
@@ -1991,6 +2018,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter 
*msf, int ifindex)
msf-imsf_fmode != MCAST_EXCLUDE)
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
imr.imr_multiaddr.s_addr = msf-imsf_multiaddr;
@@ -2071,6 +2101,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
imr.imr_multiaddr.s_addr = msf-imsf_multiaddr;
@@ -2133,6 +2166,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter 
*gsf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
 
+   if (sk-sk_net != init_net)
+   return -EPROTONOSUPPORT;
+
rtnl_lock();
 
err = -EADDRNOTAVAIL;
@@ -2217,6 +2253,9 @@ void ip_mc_drop_socket(struct sock *sk)
if (inet-mc_list == NULL)
return;
 
+   if (sk-sk_net != init_net)
+   return;
+
rtnl_lock();
while ((iml = inet-mc_list) != NULL) {
struct in_device *in_dev;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/17] [IPV4]: rt_cache_get_next should take rt_genid into account.

2008-02-06 Thread Denis V. Lunev
In the other case /proc/net/rt_cache will look inconsistent in respect to
genid.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Acked-by: Alexey Kuznetsov [EMAIL PROTECTED]
---
 net/ipv4/route.c |   18 +-
 1 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92ff622..b03de57 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -294,7 +294,8 @@ static struct rtable *rt_cache_get_first(struct 
rt_cache_iter_state *st)
return r;
 }
 
-static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct 
rtable *r)
+static struct rtable *__rt_cache_get_next(struct rt_cache_iter_state *st,
+ struct rtable *r)
 {
r = r-u.dst.rt_next;
while (!r) {
@@ -307,16 +308,23 @@ static struct rtable *rt_cache_get_next(struct 
rt_cache_iter_state *st, struct r
return rcu_dereference(r);
 }
 
+static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st,
+   struct rtable *r)
+{
+   while ((r = __rt_cache_get_next(st, r)) != NULL) {
+   if (r-rt_genid == st-genid)
+   break;
+   }
+   return r;
+}
+
 static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t 
pos)
 {
struct rtable *r = rt_cache_get_first(st);
 
if (r)
-   while (pos  (r = rt_cache_get_next(st, r))) {
-   if (r-rt_genid != st-genid)
-   continue;
+   while (pos  (r = rt_cache_get_next(st, r)))
--pos;
-   }
return pos ? NULL : r;
 }
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/17] [NETNS]: Process /proc/net/rt_cache inside a namespace.

2008-02-06 Thread Denis V. Lunev
Show routing cache for a particular namespace only.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |   10 +++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b03de57..cc002d8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -273,6 +273,7 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr)
 
 #ifdef CONFIG_PROC_FS
 struct rt_cache_iter_state {
+   struct seq_net_private p;
int bucket;
int genid;
 };
@@ -285,7 +286,8 @@ static struct rtable *rt_cache_get_first(struct 
rt_cache_iter_state *st)
rcu_read_lock_bh();
r = rcu_dereference(rt_hash_table[st-bucket].chain);
while (r) {
-   if (r-rt_genid == st-genid)
+   if (r-u.dst.dev-nd_net == st-p.net 
+   r-rt_genid == st-genid)
return r;
r = rcu_dereference(r-u.dst.rt_next);
}
@@ -312,6 +314,8 @@ static struct rtable *rt_cache_get_next(struct 
rt_cache_iter_state *st,
struct rtable *r)
 {
while ((r = __rt_cache_get_next(st, r)) != NULL) {
+   if (r-u.dst.dev-nd_net != st-p.net)
+   continue;
if (r-rt_genid == st-genid)
break;
}
@@ -398,7 +402,7 @@ static const struct seq_operations rt_cache_seq_ops = {
 
 static int rt_cache_seq_open(struct inode *inode, struct file *file)
 {
-   return seq_open_private(file, rt_cache_seq_ops,
+   return seq_open_net(inode, file, rt_cache_seq_ops,
sizeof(struct rt_cache_iter_state));
 }
 
@@ -407,7 +411,7 @@ static const struct file_operations rt_cache_seq_fops = {
.open= rt_cache_seq_open,
.read= seq_read,
.llseek  = seq_lseek,
-   .release = seq_release_private,
+   .release = seq_release_net,
 };
 
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/17] [NETNS]: Enable IPv4 address manipulations inside namespace.

2008-02-06 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |9 -
 1 files changed, 0 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f7e78b7..aa23d10 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct 
nlmsghdr *nlh, void *arg
 
ASSERT_RTNL();
 
-   if (net != init_net)
-   return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
if (err  0)
goto errout;
@@ -560,9 +557,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct 
nlmsghdr *nlh, void *arg
 
ASSERT_RTNL();
 
-   if (net != init_net)
-   return -EINVAL;
-
ifa = rtm_to_ifaddr(net, nlh);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
@@ -1169,9 +1163,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct 
netlink_callback *cb)
struct in_ifaddr *ifa;
int s_ip_idx, s_idx = cb-args[0];
 
-   if (net != init_net)
-   return 0;
-
s_ip_idx = ip_idx = cb-args[1];
idx = 0;
for_each_netdev(net, dev) {
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/17] Finish IPv4 infrastructure namespacing

2008-02-06 Thread Denis V. Lunev
David Miller wrote:
 What part of no new features did you not understand?

OOPS, again :( sorry, I miss that thread
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] [IPV4]: Formatting fix for /proc/net/fib_trie.

2008-02-05 Thread Denis V. Lunev
The line in the /proc/net/fib_trie for route with TOS specified
- has extra \n at the end
- does not have a space after route scope
like below.
   |-- 1.1.1.1
  /32 universe UNICASTtos =1

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/fib_trie.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 35851c9..f5fba3f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2431,8 +2431,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void 
*v)
   rtn_type(buf2, sizeof(buf2),
fa-fa_type));
if (fa-fa_tos)
-   seq_printf(seq, tos =%d\n,
-  fa-fa_tos);
+   seq_printf(seq,  tos=%d, fa-fa_tos);
seq_putc(seq, '\n');
}
}
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [IPV4]: Fix compiler error with CONFIG_PROC_FS=n

2008-02-05 Thread Denis V. Lunev
Johann Felix Soden wrote:
 From: Johann Felix Soden [EMAIL PROTECTED]
 
 Handle CONFIG_PROC_FS=n in net/ipv4/fib_frontend.c because:
 
 net/ipv4/fib_frontend.c: In function 'fib_net_init':
 net/ipv4/fib_frontend.c:1032: error: implicit declaration of function 
 'fib_proc_init'
 net/ipv4/fib_frontend.c: In function 'fib_net_exit':
 net/ipv4/fib_frontend.c:1047: error: implicit declaration of function 
 'fib_proc_exit'
 
 Signed-off-by: Johann Felix Soden [EMAIL PROTECTED]

Fix from Li Zefan is already commited.
(cc8274f50f2ad9a97a837451f63a0a3e65f7f490)
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] [RAW]: proc output cleanups.

2008-01-31 Thread Denis V. Lunev
yesterday Adrian Bunk noticed, that the commit

commit 42a73808ed4f30b739eb52bcbb33a02fe62ceef5
Author: Pavel Emelyanov [EMAIL PROTECTED]
Date:   Mon Nov 19 22:38:33 2007 -0800

is somewhat strange. Basically, the commit is obviously wrong as the
content of the /proc/net/raw6 is incorrect due to it.

This series of patches fixes original problem and slightly cleanups the
code around.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] [RAW]: Cleanup IPv4 raw_seq_show.

2008-01-31 Thread Denis V. Lunev
There is no need to use 128 bytes on the stack at all. Clean the code in
the IPv6 style.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/raw.c |   24 +++-
 1 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 507cbfe..830f19e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -927,7 +927,7 @@ void raw_seq_stop(struct seq_file *seq, void *v)
 }
 EXPORT_SYMBOL_GPL(raw_seq_stop);
 
-static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
+static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
 {
struct inet_sock *inet = inet_sk(sp);
__be32 dest = inet-daddr,
@@ -935,33 +935,23 @@ static __inline__ char *get_raw_sock(struct sock *sp, 
char *tmpbuf, int i)
__u16 destp = 0,
  srcp  = inet-num;
 
-   sprintf(tmpbuf, %4d: %08X:%04X %08X:%04X
+   seq_printf(seq, %4d: %08X:%04X %08X:%04X
 %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d,
i, src, srcp, dest, destp, sp-sk_state,
atomic_read(sp-sk_wmem_alloc),
atomic_read(sp-sk_rmem_alloc),
0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
atomic_read(sp-sk_refcnt), sp, atomic_read(sp-sk_drops));
-   return tmpbuf;
 }
 
-#define TMPSZ 128
-
 static int raw_seq_show(struct seq_file *seq, void *v)
 {
-   char tmpbuf[TMPSZ+1];
-
if (v == SEQ_START_TOKEN)
-   seq_printf(seq, %-*s\n, TMPSZ-1,
-sl  local_address rem_address   st tx_queue 
-  rx_queue tr tm-when retrnsmt   uid  timeout 
-  inode  drops);
-   else {
-   struct raw_iter_state *state = raw_seq_private(seq);
-
-   seq_printf(seq, %-*s\n, TMPSZ-1,
-  get_raw_sock(v, tmpbuf, state-bucket));
-   }
+   seq_printf(seq,   sl  local_address rem_address   st tx_queue 
+   rx_queue tr tm-when retrnsmt   uid  timeout 
+   inode  drops\n);
+   else
+   raw_sock_seq_show(seq, v, raw_seq_private(seq)-bucket);
return 0;
 }
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] [RAW]: Wrong content of the /proc/net/raw6.

2008-01-31 Thread Denis V. Lunev
The address of IPv6 raw sockets was shown in the wrong format, from IPv4 ones.
The problem has been introduced by the
commit 42a73808ed4f30b739eb52bcbb33a02fe62ceef5
Author: Pavel Emelyanov [EMAIL PROTECTED]
Date:   Mon Nov 19 22:38:33 2007 -0800

Thanks to Adrian Bunk who originally noticed the problem.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/raw.h |3 ++-
 net/ipv4/raw.c|8 
 net/ipv6/raw.c|2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/net/raw.h b/include/net/raw.h
index c7ea7a2..1828f81 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -48,7 +48,8 @@ struct raw_iter_state {
 void *raw_seq_start(struct seq_file *seq, loff_t *pos);
 void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 void raw_seq_stop(struct seq_file *seq, void *v);
-int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h);
+int raw_seq_open(struct inode *ino, struct file *file,
+struct raw_hashinfo *h, const struct seq_operations *ops);
 
 #endif
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 830f19e..a3002fe 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -962,13 +962,13 @@ static const struct seq_operations raw_seq_ops = {
.show  = raw_seq_show,
 };
 
-int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h)
+int raw_seq_open(struct inode *ino, struct file *file,
+struct raw_hashinfo *h, const struct seq_operations *ops)
 {
int err;
struct raw_iter_state *i;
 
-   err = seq_open_net(ino, file, raw_seq_ops,
-   sizeof(struct raw_iter_state));
+   err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state));
if (err  0)
return err;
 
@@ -980,7 +980,7 @@ EXPORT_SYMBOL_GPL(raw_seq_open);
 
 static int raw_v4_seq_open(struct inode *inode, struct file *file)
 {
-   return raw_seq_open(inode, file, raw_v4_hashinfo);
+   return raw_seq_open(inode, file, raw_v4_hashinfo, raw_seq_ops);
 }
 
 static const struct file_operations raw_seq_fops = {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a2cf499..8897ccf 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1262,7 +1262,7 @@ static const struct seq_operations raw6_seq_ops = {
 
 static int raw6_seq_open(struct inode *inode, struct file *file)
 {
-   return raw_seq_open(inode, file, raw_v6_hashinfo);
+   return raw_seq_open(inode, file, raw_v6_hashinfo, raw6_seq_ops);
 }
 
 static const struct file_operations raw6_seq_fops = {
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] [RAW]: Family check in the /proc/net/raw[6] is extra.

2008-01-31 Thread Denis V. Lunev
Different hashtables are used for IPv6 and IPv4 raw sockets, so no need to
check the socket family in the iterator over hashtables. Clean this out.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/raw.h |4 +---
 net/ipv4/raw.c|   12 
 net/ipv6/raw.c|2 +-
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/include/net/raw.h b/include/net/raw.h
index cca81d8..c7ea7a2 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -41,7 +41,6 @@ extern void raw_proc_exit(void);
 struct raw_iter_state {
struct seq_net_private p;
int bucket;
-   unsigned short family;
struct raw_hashinfo *h;
 };
 
@@ -49,8 +48,7 @@ struct raw_iter_state {
 void *raw_seq_start(struct seq_file *seq, loff_t *pos);
 void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 void raw_seq_stop(struct seq_file *seq, void *v);
-int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h,
-   unsigned short family);
+int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h);
 
 #endif
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f863c3d..507cbfe 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -862,8 +862,7 @@ static struct sock *raw_get_first(struct seq_file *seq)
struct hlist_node *node;
 
sk_for_each(sk, node, state-h-ht[state-bucket])
-   if (sk-sk_net == state-p.net 
-   sk-sk_family == state-family)
+   if (sk-sk_net == state-p.net)
goto found;
}
sk = NULL;
@@ -879,8 +878,7 @@ static struct sock *raw_get_next(struct seq_file *seq, 
struct sock *sk)
sk = sk_next(sk);
 try_again:
;
-   } while (sk  sk-sk_net != state-p.net 
-   sk-sk_family != state-family);
+   } while (sk  sk-sk_net != state-p.net);
 
if (!sk  ++state-bucket  RAW_HTABLE_SIZE) {
sk = sk_head(state-h-ht[state-bucket]);
@@ -974,8 +972,7 @@ static const struct seq_operations raw_seq_ops = {
.show  = raw_seq_show,
 };
 
-int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h,
-   unsigned short family)
+int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h)
 {
int err;
struct raw_iter_state *i;
@@ -987,14 +984,13 @@ int raw_seq_open(struct inode *ino, struct file *file, 
struct raw_hashinfo *h,
 
i = raw_seq_private((struct seq_file *)file-private_data);
i-h = h;
-   i-family = family;
return 0;
 }
 EXPORT_SYMBOL_GPL(raw_seq_open);
 
 static int raw_v4_seq_open(struct inode *inode, struct file *file)
 {
-   return raw_seq_open(inode, file, raw_v4_hashinfo, PF_INET);
+   return raw_seq_open(inode, file, raw_v4_hashinfo);
 }
 
 static const struct file_operations raw_seq_fops = {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index d61c63d..a2cf499 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1262,7 +1262,7 @@ static const struct seq_operations raw6_seq_ops = {
 
 static int raw6_seq_open(struct inode *inode, struct file *file)
 {
-   return raw_seq_open(inode, file, raw_v6_hashinfo, PF_INET6);
+   return raw_seq_open(inode, file, raw_v6_hashinfo);
 }
 
 static const struct file_operations raw6_seq_fops = {
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/6] preparations to enable netdevice notifiers inside a namespace (resend)

2008-01-31 Thread Denis V. Lunev
Here are some preparations and cleanups to enable network device/inet
address notifiers inside a namespace.

This set of patches has been originally sent last Friday. One cleanup
patch from the original series is dropped as wrong, thanks to Daniel
Lezcano.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/6] [IPV4]: Fix memory leak on error path during FIB initialization.

2008-01-31 Thread Denis V. Lunev
net-ipv4.fib_table_hash is not freed when fib4_rules_init failed. The problem
has been introduced by the following commit.
commit c8050bf6d84785a7edd2e81591e8f833231477e8
Author: Denis V. Lunev [EMAIL PROTECTED]
Date:   Thu Jan 10 03:28:24 2008 -0800

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/fib_frontend.c |   10 +-
 1 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d282618..d0507f4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -975,6 +975,7 @@ static struct notifier_block fib_netdev_notifier = {
 
 static int __net_init ip_fib_net_init(struct net *net)
 {
+   int err;
unsigned int i;
 
net-ipv4.fib_table_hash = kzalloc(
@@ -985,7 +986,14 @@ static int __net_init ip_fib_net_init(struct net *net)
for (i = 0; i  FIB_TABLE_HASHSZ; i++)
INIT_HLIST_HEAD(net-ipv4.fib_table_hash[i]);
 
-   return fib4_rules_init(net);
+   err = fib4_rules_init(net);
+   if (err  0)
+   goto fail;
+   return 0;
+
+fail:
+   kfree(net-ipv4.fib_table_hash);
+   return err;
 }
 
 static void __net_exit ip_fib_net_exit(struct net *net)
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/6] [IPV4]: Small style cleanup of the error path in rtm_to_ifaddr.

2008-01-31 Thread Denis V. Lunev
Remove error code assignment inside brackets on failure. The code looks better
if the error is assigned before condition check. Also, the compiler treats this
better.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |   21 -
 1 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 21f71bf..9da4c68 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -492,39 +492,34 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr 
*nlh)
struct ifaddrmsg *ifm;
struct net_device *dev;
struct in_device *in_dev;
-   int err = -EINVAL;
+   int err;
 
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
if (err  0)
goto errout;
 
ifm = nlmsg_data(nlh);
-   if (ifm-ifa_prefixlen  32 || tb[IFA_LOCAL] == NULL) {
-   err = -EINVAL;
+   err = -EINVAL;
+   if (ifm-ifa_prefixlen  32 || tb[IFA_LOCAL] == NULL)
goto errout;
-   }
 
dev = __dev_get_by_index(init_net, ifm-ifa_index);
-   if (dev == NULL) {
-   err = -ENODEV;
+   err = -ENODEV;
+   if (dev == NULL)
goto errout;
-   }
 
in_dev = __in_dev_get_rtnl(dev);
-   if (in_dev == NULL) {
-   err = -ENOBUFS;
+   err = -ENOBUFS;
+   if (in_dev == NULL)
goto errout;
-   }
 
ifa = inet_alloc_ifa();
-   if (ifa == NULL) {
+   if (ifa == NULL)
/*
 * A potential indev allocation can be left alive, it stays
 * assigned to its device and is destroy with it.
 */
-   err = -ENOBUFS;
goto errout;
-   }
 
ipv4_devconf_setall(in_dev);
in_dev_hold(in_dev);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/6] [NETNS]: Add a namespace mark to fib_info.

2008-01-31 Thread Denis V. Lunev
This is required to make fib_info lookups namespace aware. In the other case
initial namespace devices are marked as dead in the local routing table
during other namespace stop.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h |1 +
 net/ipv4/fib_semantics.c |8 
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 1b2f008..cb0df37 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -69,6 +69,7 @@ struct fib_nh {
 struct fib_info {
struct hlist_node   fib_hash;
struct hlist_node   fib_lhash;
+   struct net  *fib_net;
int fib_treeref;
atomic_tfib_clntref;
int fib_dead;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5beff2e..97cc494 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -687,6 +687,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
struct fib_info *fi = NULL;
struct fib_info *ofi;
int nhs = 1;
+   struct net *net = cfg-fc_nlinfo.nl_net;
 
/* Fast check to catch the most weird cases */
if (fib_props[cfg-fc_type].scope  cfg-fc_scope)
@@ -727,6 +728,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto failure;
fib_info_cnt++;
 
+   fi-fib_net = net;
fi-fib_protocol = cfg-fc_protocol;
fi-fib_flags = cfg-fc_flags;
fi-fib_priority = cfg-fc_priority;
@@ -798,8 +800,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (nhs != 1 || nh-nh_gw)
goto err_inval;
nh-nh_scope = RT_SCOPE_NOWHERE;
-   nh-nh_dev = dev_get_by_index(cfg-fc_nlinfo.nl_net,
- fi-fib_nh-nh_oif);
+   nh-nh_dev = dev_get_by_index(net, fi-fib_nh-nh_oif);
err = -ENODEV;
if (nh-nh_dev == NULL)
goto failure;
@@ -813,8 +814,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (fi-fib_prefsrc) {
if (cfg-fc_type != RTN_LOCAL || !cfg-fc_dst ||
fi-fib_prefsrc != cfg-fc_dst)
-   if (inet_addr_type(cfg-fc_nlinfo.nl_net,
-  fi-fib_prefsrc) != RTN_LOCAL)
+   if (inet_addr_type(net, fi-fib_prefsrc) != RTN_LOCAL)
goto err_inval;
}
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/6] [NETNS]: Process interface address manipulation routines in the namespace.

2008-01-31 Thread Denis V. Lunev
The namespace is available when required except rtm_to_ifaddr. Add
namespace argument to it.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |   14 --
 1 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e55c85e..6a6e92e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -485,7 +485,7 @@ errout:
return err;
 }
 
-static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
+static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 {
struct nlattr *tb[IFA_MAX+1];
struct in_ifaddr *ifa;
@@ -503,7 +503,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
if (ifm-ifa_prefixlen  32 || tb[IFA_LOCAL] == NULL)
goto errout;
 
-   dev = __dev_get_by_index(init_net, ifm-ifa_index);
+   dev = __dev_get_by_index(net, ifm-ifa_index);
err = -ENODEV;
if (dev == NULL)
goto errout;
@@ -571,7 +571,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct 
nlmsghdr *nlh, void *arg
if (net != init_net)
return -EINVAL;
 
-   ifa = rtm_to_ifaddr(nlh);
+   ifa = rtm_to_ifaddr(net, nlh);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
 
@@ -1189,7 +1189,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct 
netlink_callback *cb)
 
s_ip_idx = ip_idx = cb-args[1];
idx = 0;
-   for_each_netdev(init_net, dev) {
+   for_each_netdev(net, dev) {
if (idx  s_idx)
goto cont;
if (idx  s_idx)
@@ -1223,7 +1223,9 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, 
struct nlmsghdr *nlh,
struct sk_buff *skb;
u32 seq = nlh ? nlh-nlmsg_seq : 0;
int err = -ENOBUFS;
+   struct net *net;
 
+   net = ifa-ifa_dev-dev-nd_net;
skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
if (skb == NULL)
goto errout;
@@ -1235,10 +1237,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, 
struct nlmsghdr *nlh,
kfree_skb(skb);
goto errout;
}
-   err = rtnl_notify(skb, init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, 
GFP_KERNEL);
+   err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
 errout:
if (err  0)
-   rtnl_set_sk_err(init_net, RTNLGRP_IPV4_IFADDR, err);
+   rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
 }
 
 #ifdef CONFIG_SYSCTL
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/6] [IPV4]: fib_sync_down rework.

2008-01-31 Thread Denis V. Lunev
fib_sync_down can be called with an address and with a device. In reality
it is called either with address OR with a device. The codepath inside is
completely different, so lets separate it into two calls for these two
cases.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h |3 +-
 net/ipv4/fib_frontend.c  |4 +-
 net/ipv4/fib_semantics.c |  104 +++--
 3 files changed, 57 insertions(+), 54 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 9daa60b..1b2f008 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -218,7 +218,8 @@ extern void fib_select_default(struct net *net, const 
struct flowi *flp,
 
 /* Exported by fib_semantics.c */
 extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
-extern int fib_sync_down(__be32 local, struct net_device *dev, int force);
+extern int fib_sync_down_dev(struct net_device *dev, int force);
+extern int fib_sync_down_addr(__be32 local);
 extern int fib_sync_up(struct net_device *dev);
 extern __be32  __fib_res_prefsrc(struct fib_result *res);
 extern void fib_select_multipath(const struct flowi *flp, struct fib_result 
*res);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d0507f4..d69ffa2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
   First of all, we scan fib_info list searching
   for stray nexthop entries, then ignite fib_flush.
*/
-   if (fib_sync_down(ifa-ifa_local, NULL, 0))
+   if (fib_sync_down_addr(ifa-ifa_local))
fib_flush(dev-nd_net);
}
}
@@ -898,7 +898,7 @@ static void nl_fib_lookup_exit(struct net *net)
 
 static void fib_disable_ip(struct net_device *dev, int force)
 {
-   if (fib_sync_down(0, dev, force))
+   if (fib_sync_down_dev(dev, force))
fib_flush(dev-nd_net);
rt_cache_flush(0);
arp_ifdown(dev);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c791286..5beff2e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1031,70 +1031,72 @@ nla_put_failure:
  referring to it.
- device went down - we must shutdown all nexthops going via it.
  */
-
-int fib_sync_down(__be32 local, struct net_device *dev, int force)
+int fib_sync_down_addr(__be32 local)
 {
int ret = 0;
-   int scope = RT_SCOPE_NOWHERE;
-
-   if (force)
-   scope = -1;
+   unsigned int hash = fib_laddr_hashfn(local);
+   struct hlist_head *head = fib_info_laddrhash[hash];
+   struct hlist_node *node;
+   struct fib_info *fi;
 
-   if (local  fib_info_laddrhash) {
-   unsigned int hash = fib_laddr_hashfn(local);
-   struct hlist_head *head = fib_info_laddrhash[hash];
-   struct hlist_node *node;
-   struct fib_info *fi;
+   if (fib_info_laddrhash == NULL || local == 0)
+   return 0;
 
-   hlist_for_each_entry(fi, node, head, fib_lhash) {
-   if (fi-fib_prefsrc == local) {
-   fi-fib_flags |= RTNH_F_DEAD;
-   ret++;
-   }
+   hlist_for_each_entry(fi, node, head, fib_lhash) {
+   if (fi-fib_prefsrc == local) {
+   fi-fib_flags |= RTNH_F_DEAD;
+   ret++;
}
}
+   return ret;
+}
 
-   if (dev) {
-   struct fib_info *prev_fi = NULL;
-   unsigned int hash = fib_devindex_hashfn(dev-ifindex);
-   struct hlist_head *head = fib_info_devhash[hash];
-   struct hlist_node *node;
-   struct fib_nh *nh;
+int fib_sync_down_dev(struct net_device *dev, int force)
+{
+   int ret = 0;
+   int scope = RT_SCOPE_NOWHERE;
+   struct fib_info *prev_fi = NULL;
+   unsigned int hash = fib_devindex_hashfn(dev-ifindex);
+   struct hlist_head *head = fib_info_devhash[hash];
+   struct hlist_node *node;
+   struct fib_nh *nh;
 
-   hlist_for_each_entry(nh, node, head, nh_hash) {
-   struct fib_info *fi = nh-nh_parent;
-   int dead;
+   if (force)
+   scope = -1;
 
-   BUG_ON(!fi-fib_nhs);
-   if (nh-nh_dev != dev || fi == prev_fi)
-   continue;
-   prev_fi = fi;
-   dead = 0;
-   change_nexthops(fi) {
-   if (nh-nh_flagsRTNH_F_DEAD)
-   dead++;
-   else if (nh-nh_dev == dev 
-nh-nh_scope != scope

[PATCH 6/6] [NETNS]: Lookup in FIB semantic hashes taking into account the namespace.

2008-01-31 Thread Denis V. Lunev
The namespace is not available in the fib_sync_down_addr, add it
as a parameter.

Looking up a device by the pointer to it is OK. Looking up using a result
from fib_trie/fib_hash table lookup is also safe. No need to fix that at all.
So, just fix lookup by address and insertion to the hash table path.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h |2 +-
 net/ipv4/fib_frontend.c  |2 +-
 net/ipv4/fib_semantics.c |6 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index cb0df37..90d1175 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -220,7 +220,7 @@ extern void fib_select_default(struct net *net, const 
struct flowi *flp,
 /* Exported by fib_semantics.c */
 extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
 extern int fib_sync_down_dev(struct net_device *dev, int force);
-extern int fib_sync_down_addr(__be32 local);
+extern int fib_sync_down_addr(struct net *net, __be32 local);
 extern int fib_sync_up(struct net_device *dev);
 extern __be32  __fib_res_prefsrc(struct fib_result *res);
 extern void fib_select_multipath(const struct flowi *flp, struct fib_result 
*res);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d69ffa2..86ff271 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
   First of all, we scan fib_info list searching
   for stray nexthop entries, then ignite fib_flush.
*/
-   if (fib_sync_down_addr(ifa-ifa_local))
+   if (fib_sync_down_addr(dev-nd_net, ifa-ifa_local))
fib_flush(dev-nd_net);
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 97cc494..a13c847 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -229,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info 
*nfi)
head = fib_info_hash[hash];
 
hlist_for_each_entry(fi, node, head, fib_hash) {
+   if (fi-fib_net != nfi-fib_net)
+   continue;
if (fi-fib_nhs != nfi-fib_nhs)
continue;
if (nfi-fib_protocol == fi-fib_protocol 
@@ -1031,7 +1033,7 @@ nla_put_failure:
  referring to it.
- device went down - we must shutdown all nexthops going via it.
  */
-int fib_sync_down_addr(__be32 local)
+int fib_sync_down_addr(struct net *net, __be32 local)
 {
int ret = 0;
unsigned int hash = fib_laddr_hashfn(local);
@@ -1043,6 +1045,8 @@ int fib_sync_down_addr(__be32 local)
return 0;
 
hlist_for_each_entry(fi, node, head, fib_lhash) {
+   if (fi-fib_net != net)
+   continue;
if (fi-fib_prefsrc == local) {
fi-fib_flags |= RTNH_F_DEAD;
ret++;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Debugfs compile fix.

2008-01-29 Thread Denis V. Lunev
Debugfs is not compiled without CONFIG_SYSFS in net-2.6 tree. Move
kobject_create_and_add under appropriate ifdef. The fix looks correct
from a first glance, but may be the dependency should be added into
the Kconfig.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 fs/debugfs/inode.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d26e282..61cc937 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -432,9 +432,11 @@ static int __init debugfs_init(void)
 {
int retval;
 
+#ifdef CONFIG_SYSFS
debug_kobj = kobject_create_and_add(debug, kernel_kobj);
if (!debug_kobj)
return -EINVAL;
+#endif
 
retval = register_filesystem(debug_fs_type);
if (retval)
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/7 net-2.6.25] [NETNS]: Lookup in FIB semantic hashes taking into account the namespace.

2008-01-25 Thread Denis V. Lunev
The namespace is not available in the fib_sync_down_addr, add it
as a parameter.

Looking up a device by the pointer to it is OK. Looking up using a result
from fib_trie/fib_hash table lookup is also safe. No need to fix that at all.
So, just fix lookup by address and insertion to the hash table path.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h |2 +-
 net/ipv4/fib_frontend.c  |2 +-
 net/ipv4/fib_semantics.c |6 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index cb0df37..90d1175 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -220,7 +220,7 @@ extern void fib_select_default(struct net *net, const 
struct flowi *flp,
 /* Exported by fib_semantics.c */
 extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
 extern int fib_sync_down_dev(struct net_device *dev, int force);
-extern int fib_sync_down_addr(__be32 local);
+extern int fib_sync_down_addr(struct net *net, __be32 local);
 extern int fib_sync_up(struct net_device *dev);
 extern __be32  __fib_res_prefsrc(struct fib_result *res);
 extern void fib_select_multipath(const struct flowi *flp, struct fib_result 
*res);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d69ffa2..86ff271 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
   First of all, we scan fib_info list searching
   for stray nexthop entries, then ignite fib_flush.
*/
-   if (fib_sync_down_addr(ifa-ifa_local))
+   if (fib_sync_down_addr(dev-nd_net, ifa-ifa_local))
fib_flush(dev-nd_net);
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 97cc494..a13c847 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -229,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info 
*nfi)
head = fib_info_hash[hash];
 
hlist_for_each_entry(fi, node, head, fib_hash) {
+   if (fi-fib_net != nfi-fib_net)
+   continue;
if (fi-fib_nhs != nfi-fib_nhs)
continue;
if (nfi-fib_protocol == fi-fib_protocol 
@@ -1031,7 +1033,7 @@ nla_put_failure:
  referring to it.
- device went down - we must shutdown all nexthops going via it.
  */
-int fib_sync_down_addr(__be32 local)
+int fib_sync_down_addr(struct net *net, __be32 local)
 {
int ret = 0;
unsigned int hash = fib_laddr_hashfn(local);
@@ -1043,6 +1045,8 @@ int fib_sync_down_addr(__be32 local)
return 0;
 
hlist_for_each_entry(fi, node, head, fib_lhash) {
+   if (fi-fib_net != net)
+   continue;
if (fi-fib_prefsrc == local) {
fi-fib_flags |= RTNH_F_DEAD;
ret++;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/7 net-2.6.25] [IPV4]: Fix memory leak on error path during FIB initialization.

2008-01-25 Thread Denis V. Lunev
net-ipv4.fib_table_hash is not freed when fib4_rules_init failed. The problem
has been introduced by the following commit.
commit c8050bf6d84785a7edd2e81591e8f833231477e8
Author: Denis V. Lunev [EMAIL PROTECTED]
Date:   Thu Jan 10 03:28:24 2008 -0800

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/fib_frontend.c |   10 +-
 1 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d282618..d0507f4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -975,6 +975,7 @@ static struct notifier_block fib_netdev_notifier = {
 
 static int __net_init ip_fib_net_init(struct net *net)
 {
+   int err;
unsigned int i;
 
net-ipv4.fib_table_hash = kzalloc(
@@ -985,7 +986,14 @@ static int __net_init ip_fib_net_init(struct net *net)
for (i = 0; i  FIB_TABLE_HASHSZ; i++)
INIT_HLIST_HEAD(net-ipv4.fib_table_hash[i]);
 
-   return fib4_rules_init(net);
+   err = fib4_rules_init(net);
+   if (err  0)
+   goto fail;
+   return 0;
+
+fail:
+   kfree(net-ipv4.fib_table_hash);
+   return err;
 }
 
 static void __net_exit ip_fib_net_exit(struct net *net)
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/7 net-2.6.25] [IPV4]: Small style cleanup of the error path in rtm_to_ifaddr.

2008-01-25 Thread Denis V. Lunev
Remove error code assignment inside brackets on failure. The code looks better
if the error is assigned before condition check. Also, the compiler treats this
better.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |   21 -
 1 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 21f71bf..9da4c68 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -492,39 +492,34 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr 
*nlh)
struct ifaddrmsg *ifm;
struct net_device *dev;
struct in_device *in_dev;
-   int err = -EINVAL;
+   int err;
 
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
if (err  0)
goto errout;
 
ifm = nlmsg_data(nlh);
-   if (ifm-ifa_prefixlen  32 || tb[IFA_LOCAL] == NULL) {
-   err = -EINVAL;
+   err = -EINVAL;
+   if (ifm-ifa_prefixlen  32 || tb[IFA_LOCAL] == NULL)
goto errout;
-   }
 
dev = __dev_get_by_index(init_net, ifm-ifa_index);
-   if (dev == NULL) {
-   err = -ENODEV;
+   err = -ENODEV;
+   if (dev == NULL)
goto errout;
-   }
 
in_dev = __in_dev_get_rtnl(dev);
-   if (in_dev == NULL) {
-   err = -ENOBUFS;
+   err = -ENOBUFS;
+   if (in_dev == NULL)
goto errout;
-   }
 
ifa = inet_alloc_ifa();
-   if (ifa == NULL) {
+   if (ifa == NULL)
/*
 * A potential indev allocation can be left alive, it stays
 * assigned to its device and is destroy with it.
 */
-   err = -ENOBUFS;
goto errout;
-   }
 
ipv4_devconf_setall(in_dev);
in_dev_hold(in_dev);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/7 net-2.6.25] [NETNS]: Add a namespace mark to fib_info.

2008-01-25 Thread Denis V. Lunev
This is required to make fib_info lookups namespace aware. In the other case
initial namespace devices are marked as dead in the local routing table
during other namespace stop.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h |1 +
 net/ipv4/fib_semantics.c |8 
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 1b2f008..cb0df37 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -69,6 +69,7 @@ struct fib_nh {
 struct fib_info {
struct hlist_node   fib_hash;
struct hlist_node   fib_lhash;
+   struct net  *fib_net;
int fib_treeref;
atomic_tfib_clntref;
int fib_dead;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5beff2e..97cc494 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -687,6 +687,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
struct fib_info *fi = NULL;
struct fib_info *ofi;
int nhs = 1;
+   struct net *net = cfg-fc_nlinfo.nl_net;
 
/* Fast check to catch the most weird cases */
if (fib_props[cfg-fc_type].scope  cfg-fc_scope)
@@ -727,6 +728,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto failure;
fib_info_cnt++;
 
+   fi-fib_net = net;
fi-fib_protocol = cfg-fc_protocol;
fi-fib_flags = cfg-fc_flags;
fi-fib_priority = cfg-fc_priority;
@@ -798,8 +800,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (nhs != 1 || nh-nh_gw)
goto err_inval;
nh-nh_scope = RT_SCOPE_NOWHERE;
-   nh-nh_dev = dev_get_by_index(cfg-fc_nlinfo.nl_net,
- fi-fib_nh-nh_oif);
+   nh-nh_dev = dev_get_by_index(net, fi-fib_nh-nh_oif);
err = -ENODEV;
if (nh-nh_dev == NULL)
goto failure;
@@ -813,8 +814,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (fi-fib_prefsrc) {
if (cfg-fc_type != RTN_LOCAL || !cfg-fc_dst ||
fi-fib_prefsrc != cfg-fc_dst)
-   if (inet_addr_type(cfg-fc_nlinfo.nl_net,
-  fi-fib_prefsrc) != RTN_LOCAL)
+   if (inet_addr_type(net, fi-fib_prefsrc) != RTN_LOCAL)
goto err_inval;
}
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/7 net-2.6.25] [NETNS]: Process interface address manipulation routines in the namespace.

2008-01-25 Thread Denis V. Lunev
The namespace is available when required except rtm_to_ifaddr. Add
namespace argument to it.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |   14 --
 1 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e55c85e..6a6e92e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -485,7 +485,7 @@ errout:
return err;
 }
 
-static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
+static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 {
struct nlattr *tb[IFA_MAX+1];
struct in_ifaddr *ifa;
@@ -503,7 +503,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
if (ifm-ifa_prefixlen  32 || tb[IFA_LOCAL] == NULL)
goto errout;
 
-   dev = __dev_get_by_index(init_net, ifm-ifa_index);
+   dev = __dev_get_by_index(net, ifm-ifa_index);
err = -ENODEV;
if (dev == NULL)
goto errout;
@@ -571,7 +571,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct 
nlmsghdr *nlh, void *arg
if (net != init_net)
return -EINVAL;
 
-   ifa = rtm_to_ifaddr(nlh);
+   ifa = rtm_to_ifaddr(net, nlh);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
 
@@ -1189,7 +1189,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct 
netlink_callback *cb)
 
s_ip_idx = ip_idx = cb-args[1];
idx = 0;
-   for_each_netdev(init_net, dev) {
+   for_each_netdev(net, dev) {
if (idx  s_idx)
goto cont;
if (idx  s_idx)
@@ -1223,7 +1223,9 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, 
struct nlmsghdr *nlh,
struct sk_buff *skb;
u32 seq = nlh ? nlh-nlmsg_seq : 0;
int err = -ENOBUFS;
+   struct net *net;
 
+   net = ifa-ifa_dev-dev-nd_net;
skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
if (skb == NULL)
goto errout;
@@ -1235,10 +1237,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, 
struct nlmsghdr *nlh,
kfree_skb(skb);
goto errout;
}
-   err = rtnl_notify(skb, init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, 
GFP_KERNEL);
+   err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
 errout:
if (err  0)
-   rtnl_set_sk_err(init_net, RTNLGRP_IPV4_IFADDR, err);
+   rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
 }
 
 #ifdef CONFIG_SYSCTL
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/7 net-2.6.25] [IPV4]: fib_sync_down rework.

2008-01-25 Thread Denis V. Lunev
fib_sync_down can be called with an address and with a device. In reality
it is called either with address OR with a device. The codepath inside is
completely different, so lets separate it into two calls for these two
cases.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h |3 +-
 net/ipv4/fib_frontend.c  |4 +-
 net/ipv4/fib_semantics.c |  104 +++--
 3 files changed, 57 insertions(+), 54 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 9daa60b..1b2f008 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -218,7 +218,8 @@ extern void fib_select_default(struct net *net, const 
struct flowi *flp,
 
 /* Exported by fib_semantics.c */
 extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
-extern int fib_sync_down(__be32 local, struct net_device *dev, int force);
+extern int fib_sync_down_dev(struct net_device *dev, int force);
+extern int fib_sync_down_addr(__be32 local);
 extern int fib_sync_up(struct net_device *dev);
 extern __be32  __fib_res_prefsrc(struct fib_result *res);
 extern void fib_select_multipath(const struct flowi *flp, struct fib_result 
*res);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d0507f4..d69ffa2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
   First of all, we scan fib_info list searching
   for stray nexthop entries, then ignite fib_flush.
*/
-   if (fib_sync_down(ifa-ifa_local, NULL, 0))
+   if (fib_sync_down_addr(ifa-ifa_local))
fib_flush(dev-nd_net);
}
}
@@ -898,7 +898,7 @@ static void nl_fib_lookup_exit(struct net *net)
 
 static void fib_disable_ip(struct net_device *dev, int force)
 {
-   if (fib_sync_down(0, dev, force))
+   if (fib_sync_down_dev(dev, force))
fib_flush(dev-nd_net);
rt_cache_flush(0);
arp_ifdown(dev);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c791286..5beff2e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1031,70 +1031,72 @@ nla_put_failure:
  referring to it.
- device went down - we must shutdown all nexthops going via it.
  */
-
-int fib_sync_down(__be32 local, struct net_device *dev, int force)
+int fib_sync_down_addr(__be32 local)
 {
int ret = 0;
-   int scope = RT_SCOPE_NOWHERE;
-
-   if (force)
-   scope = -1;
+   unsigned int hash = fib_laddr_hashfn(local);
+   struct hlist_head *head = fib_info_laddrhash[hash];
+   struct hlist_node *node;
+   struct fib_info *fi;
 
-   if (local  fib_info_laddrhash) {
-   unsigned int hash = fib_laddr_hashfn(local);
-   struct hlist_head *head = fib_info_laddrhash[hash];
-   struct hlist_node *node;
-   struct fib_info *fi;
+   if (fib_info_laddrhash == NULL || local == 0)
+   return 0;
 
-   hlist_for_each_entry(fi, node, head, fib_lhash) {
-   if (fi-fib_prefsrc == local) {
-   fi-fib_flags |= RTNH_F_DEAD;
-   ret++;
-   }
+   hlist_for_each_entry(fi, node, head, fib_lhash) {
+   if (fi-fib_prefsrc == local) {
+   fi-fib_flags |= RTNH_F_DEAD;
+   ret++;
}
}
+   return ret;
+}
 
-   if (dev) {
-   struct fib_info *prev_fi = NULL;
-   unsigned int hash = fib_devindex_hashfn(dev-ifindex);
-   struct hlist_head *head = fib_info_devhash[hash];
-   struct hlist_node *node;
-   struct fib_nh *nh;
+int fib_sync_down_dev(struct net_device *dev, int force)
+{
+   int ret = 0;
+   int scope = RT_SCOPE_NOWHERE;
+   struct fib_info *prev_fi = NULL;
+   unsigned int hash = fib_devindex_hashfn(dev-ifindex);
+   struct hlist_head *head = fib_info_devhash[hash];
+   struct hlist_node *node;
+   struct fib_nh *nh;
 
-   hlist_for_each_entry(nh, node, head, nh_hash) {
-   struct fib_info *fi = nh-nh_parent;
-   int dead;
+   if (force)
+   scope = -1;
 
-   BUG_ON(!fi-fib_nhs);
-   if (nh-nh_dev != dev || fi == prev_fi)
-   continue;
-   prev_fi = fi;
-   dead = 0;
-   change_nexthops(fi) {
-   if (nh-nh_flagsRTNH_F_DEAD)
-   dead++;
-   else if (nh-nh_dev == dev 
-nh-nh_scope != scope

[PATCH 3/7 net-2.6.25] [IPV4]: Prohibit assignment of 0.0.0.0 as interface address.

2008-01-25 Thread Denis V. Lunev
I could hardly imagine why sombady needs to assign 0.0.0.0 as an interface
address or interface destination address. The kernel will behave in a strage
way in several places if this is possible, as ifa_local != 0 is considered
as initialized/non-initialized state of the ifa.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/devinet.c |   12 
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9da4c68..e55c85e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -534,7 +534,13 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr 
*nlh)
ifa-ifa_dev = in_dev;
 
ifa-ifa_local = nla_get_be32(tb[IFA_LOCAL]);
+   err = -EINVAL;
+   if (ifa-ifa_local == htonl(INADDR_ANY))
+   goto fail_free;
+
ifa-ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
+   if (ifa-ifa_address == htonl(INADDR_ANY))
+   goto fail_free;
 
if (tb[IFA_BROADCAST])
ifa-ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
@@ -549,6 +555,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
 
return ifa;
 
+fail_free:
+   inet_free_ifa(ifa);
 errout:
return ERR_PTR(err);
 }
@@ -736,6 +744,8 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
ret = -EINVAL;
if (inet_abc_len(sin-sin_addr.s_addr)  0)
break;
+   if (sin-sin_addr.s_addr == INADDR_ANY)
+   break;
 
if (!ifa) {
ret = -ENOBUFS;
@@ -786,6 +796,8 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
ret = -EINVAL;
if (inet_abc_len(sin-sin_addr.s_addr)  0)
break;
+   if (sin-sin_addr.s_addr == INADDR_ANY)
+   break;
ret = 0;
inet_del_ifa(in_dev, ifap, 0);
ifa-ifa_address = sin-sin_addr.s_addr;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/7 net-2.6.25] [IPV4]: Prohibit assignment of 0.0.0.0 as interface address.

2008-01-25 Thread Denis V. Lunev
Daniel Lezcano wrote:
 Denis V. Lunev wrote:
 I could hardly imagine why sombady needs to assign 0.0.0.0 as an
 interface
 address or interface destination address. The kernel will behave in a
 strage
 way in several places if this is possible, as ifa_local != 0 is
 considered
 as initialized/non-initialized state of the ifa.
 
 AFAICS, we should be able to set at an interface address to 0.0.0.0, in
 order to remove an IP address from an interface and keep this one up.
 I see two trivial cases:
  * remove the ipv4 on an interface but continue to use it through ipv6
  * move ipv4 address from the interface to an attached bridge

For this case there is an IOCTL/netlink remove IP address.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/7 net-2.6.25] [IPV4]: Prohibit assignment of 0.0.0.0 as interface address.

2008-01-25 Thread Denis V. Lunev
Daniel Lezcano wrote:
 Denis V. Lunev wrote:
 Daniel Lezcano wrote:
 Denis V. Lunev wrote:
 I could hardly imagine why sombady needs to assign 0.0.0.0 as an
 interface
 address or interface destination address. The kernel will behave in a
 strage
 way in several places if this is possible, as ifa_local != 0 is
 considered
 as initialized/non-initialized state of the ifa.
 AFAICS, we should be able to set at an interface address to 0.0.0.0, in
 order to remove an IP address from an interface and keep this one up.
 I see two trivial cases:
  * remove the ipv4 on an interface but continue to use it through ipv6
  * move ipv4 address from the interface to an attached bridge

 For this case there is an IOCTL/netlink remove IP address.
 
 And I forgot to mention the general broadcast.
 This is need for the dhcp protocol. If you are not able to set your
 interface to 0.0.0.0, you will be not able to send a 255.255.255.255
 broadcast message to have your IP address.
 

OK. Dave, pls disregard this patch. I suspect that others in the set
should not intersect with this one.

To summarize the discussion:
there is the only reason for this assignment: old IOCTL interface does
not have a way to remove IP address except this, though netlink has a
method for it that's why I am a little bit confused :)

This is handled in the __inet_insert_ifa: ifa is just removed there and,
correctly, ifa with 0.0.0.0 address can't exists in the kernel.

Sorry :)
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/12 net-2.6.25] [NETNS]: Routing namespacing on IP output path.

2008-01-22 Thread Denis V. Lunev
This set introduces namespacing in the IP output path. The namespace is
added to all routing API functions except ones with a valid socket. This
is very intrusive.

Routing cache is virtualized as a part of this efforts, though the hash
function is not tuned to use namespace id. This not required to work in
initial namespace.

ICMP replies now also use correct namespace.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/12 net-2.6.25] [NETNS]: Add namespace parameter to ip_route_output_flow.

2008-01-22 Thread Denis V. Lunev
Needed to propagate it down to the __ip_route_output_key.

Signed_off_by: Denis V. Lunev [EMAIL PROTECTED]
---
 drivers/infiniband/hw/cxgb3/iwch_cm.c |2 +-
 include/net/route.h   |6 +++---
 net/dccp/ipv4.c   |2 +-
 net/ipv4/af_inet.c|2 +-
 net/ipv4/inet_connection_sock.c   |2 +-
 net/ipv4/ip_output.c  |2 +-
 net/ipv4/raw.c|2 +-
 net/ipv4/route.c  |7 ---
 net/ipv4/udp.c|2 +-
 9 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c 
b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 20ba372..ff3dee4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -332,7 +332,7 @@ static struct rtable *find_route(struct t3cdev *dev, __be32 
local_ip,
  }
};
 
-   if (ip_route_output_flow(rt, fl, NULL, 0))
+   if (ip_route_output_flow(init_net, rt, fl, NULL, 0))
return NULL;
return rt;
 }
diff --git a/include/net/route.h b/include/net/route.h
index 3e3b14e..6b970d7 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -112,7 +112,7 @@ extern void ip_rt_redirect(__be32 old_gw, __be32 
dst, __be32 new_gw,
 extern voidrt_cache_flush(int how);
 extern int __ip_route_output_key(struct net *, struct rtable **, 
const struct flowi *flp);
 extern int ip_route_output_key(struct rtable **, struct flowi 
*flp);
-extern int ip_route_output_flow(struct rtable **rp, struct flowi 
*flp, struct sock *sk, int flags);
+extern int ip_route_output_flow(struct net *, struct rtable **rp, 
struct flowi *flp, struct sock *sk, int flags);
 extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, 
u8 tos, struct net_device *devin);
 extern unsigned short  ip_rt_frag_needed(struct iphdr *iph, unsigned short 
new_mtu);
 extern voidip_rt_send_redirect(struct sk_buff *skb);
@@ -167,7 +167,7 @@ static inline int ip_route_connect(struct rtable **rp, 
__be32 dst,
*rp = NULL;
}
security_sk_classify_flow(sk, fl);
-   return ip_route_output_flow(rp, fl, sk, flags);
+   return ip_route_output_flow(init_net, rp, fl, sk, flags);
 }
 
 static inline int ip_route_newports(struct rtable **rp, u8 protocol,
@@ -184,7 +184,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 
protocol,
ip_rt_put(*rp);
*rp = NULL;
security_sk_classify_flow(sk, fl);
-   return ip_route_output_flow(rp, fl, sk, 0);
+   return ip_route_output_flow(init_net, rp, fl, sk, 0);
}
return 0;
 }
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f450df2..9e38b0d 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -469,7 +469,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
  };
 
security_skb_classify_flow(skb, fl);
-   if (ip_route_output_flow(rt, fl, sk, 0)) {
+   if (ip_route_output_flow(init_net, rt, fl, sk, 0)) {
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index bcf8c8a..09ca529 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1113,7 +1113,7 @@ int inet_sk_rebuild_header(struct sock *sk)
};
 
security_sk_classify_flow(sk, fl);
-   err = ip_route_output_flow(rt, fl, sk, 0);
+   err = ip_route_output_flow(init_net, rt, fl, sk, 0);
 }
if (!err)
sk_setup_caps(sk, rt-u.dst);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1c2a32f..7801cce 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -333,7 +333,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
 .dport = ireq-rmt_port } } };
 
security_req_classify_flow(req, fl);
-   if (ip_route_output_flow(rt, fl, sk, 0)) {
+   if (ip_route_output_flow(init_net, rt, fl, sk, 0)) {
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e57de0f..dc56e40 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -350,7 +350,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 * itself out.
 */
security_sk_classify_flow(sk, fl);
-   if (ip_route_output_flow(rt, fl, sk, 0))
+   if (ip_route_output_flow(init_net, rt, fl, sk, 0))
goto no_route;
}
sk_setup_caps(sk, rt-u.dst);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 91a5218..85c0869

[PATCH 3/12 net-2.6.25] [NETNS]: Add netns parameter to fib_select_default.

2008-01-22 Thread Denis V. Lunev
Currently fib_select_default calls fib_get_table() with the init_net. Prepare
it to provide a correct namespace to lookup default route.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h|3 ++-
 net/ipv4/fib_frontend.c |5 +++--
 net/ipv4/route.c|2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 39f944a..9daa60b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -213,7 +213,8 @@ extern const struct nla_policy rtm_ipv4_policy[];
 extern voidip_fib_init(void);
 extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
   struct net_device *dev, __be32 *spec_dst, u32 
*itag);
-extern void fib_select_default(const struct flowi *flp, struct fib_result 
*res);
+extern void fib_select_default(struct net *net, const struct flowi *flp,
+  struct fib_result *res);
 
 /* Exported by fib_semantics.c */
 extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 13bf01d..7e3e732 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -116,7 +116,8 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 }
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
-void fib_select_default(const struct flowi *flp, struct fib_result *res)
+void fib_select_default(struct net *net,
+   const struct flowi *flp, struct fib_result *res)
 {
struct fib_table *tb;
int table = RT_TABLE_MAIN;
@@ -125,7 +126,7 @@ void fib_select_default(const struct flowi *flp, struct 
fib_result *res)
return;
table = res-r-table;
 #endif
-   tb = fib_get_table(init_net, table);
+   tb = fib_get_table(net, table);
if (FIB_RES_GW(*res)  FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
tb-tb_select_default(tb, flp, res);
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 27e0f81..4313255 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2419,7 +2419,7 @@ static int ip_route_output_slow(struct rtable **rp, const 
struct flowi *oldflp)
else
 #endif
if (!res.prefixlen  res.type == RTN_UNICAST  !fl.oif)
-   fib_select_default(fl, res);
+   fib_select_default(init_net, fl, res);
 
if (!fl.fl4_src)
fl.fl4_src = FIB_RES_PREFSRC(res);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/12 net-2.6.25] [NETNS]: Add namespace parameter to ip_route_output_slow.

2008-01-22 Thread Denis V. Lunev
This function needs a net namespace to lookup devices, fib tables, etc. in,
so pass it there.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |   22 +++--
 1 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 674575b..c1f9950 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2248,7 +2248,8 @@ static inline int ip_mkroute_output(struct rtable **rp,
  * Major route resolver routine.
  */
 
-static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
+static int ip_route_output_slow(struct net *net, struct rtable **rp,
+   const struct flowi *oldflp)
 {
u32 tos = RT_FL_TOS(oldflp);
struct flowi fl = { .nl_u = { .ip4_u =
@@ -2260,7 +2261,7 @@ static int ip_route_output_slow(struct rtable **rp, const 
struct flowi *oldflp)
  RT_SCOPE_UNIVERSE),
  } },
.mark = oldflp-mark,
-   .iif = init_net.loopback_dev-ifindex,
+   .iif = net-loopback_dev-ifindex,
.oif = oldflp-oif };
struct fib_result res;
unsigned flags = 0;
@@ -2282,7 +2283,7 @@ static int ip_route_output_slow(struct rtable **rp, const 
struct flowi *oldflp)
goto out;
 
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-   dev_out = ip_dev_find(init_net, oldflp-fl4_src);
+   dev_out = ip_dev_find(net, oldflp-fl4_src);
if (dev_out == NULL)
goto out;
 
@@ -2322,7 +2323,7 @@ static int ip_route_output_slow(struct rtable **rp, const 
struct flowi *oldflp)
 
 
if (oldflp-oif) {
-   dev_out = dev_get_by_index(init_net, oldflp-oif);
+   dev_out = dev_get_by_index(net, oldflp-oif);
err = -ENODEV;
if (dev_out == NULL)
goto out;
@@ -2356,15 +2357,15 @@ static int ip_route_output_slow(struct rtable **rp, 
const struct flowi *oldflp)
fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
if (dev_out)
dev_put(dev_out);
-   dev_out = init_net.loopback_dev;
+   dev_out = net-loopback_dev;
dev_hold(dev_out);
-   fl.oif = init_net.loopback_dev-ifindex;
+   fl.oif = net-loopback_dev-ifindex;
res.type = RTN_LOCAL;
flags |= RTCF_LOCAL;
goto make_route;
}
 
-   if (fib_lookup(init_net, fl, res)) {
+   if (fib_lookup(net, fl, res)) {
res.fi = NULL;
if (oldflp-oif) {
/* Apparently, routing tables are wrong. Assume,
@@ -2403,7 +2404,7 @@ static int ip_route_output_slow(struct rtable **rp, const 
struct flowi *oldflp)
fl.fl4_src = fl.fl4_dst;
if (dev_out)
dev_put(dev_out);
-   dev_out = init_net.loopback_dev;
+   dev_out = net-loopback_dev;
dev_hold(dev_out);
fl.oif = dev_out-ifindex;
if (res.fi)
@@ -2419,7 +2420,7 @@ static int ip_route_output_slow(struct rtable **rp, const 
struct flowi *oldflp)
else
 #endif
if (!res.prefixlen  res.type == RTN_UNICAST  !fl.oif)
-   fib_select_default(init_net, fl, res);
+   fib_select_default(net, fl, res);
 
if (!fl.fl4_src)
fl.fl4_src = FIB_RES_PREFSRC(res);
@@ -2469,7 +2469,7 @@ int __ip_route_output_key(struct rtable **rp, const 
struct flowi *flp)
}
rcu_read_unlock_bh();
 
-   return ip_route_output_slow(rp, flp);
+   return ip_route_output_slow(init_net, rp, flp);
 }
 
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/12 net-2.6.25] [NETNS]: Add namespace parameter to ip_dev_find.

2008-01-22 Thread Denis V. Lunev
in_dev_find() need a namespace to pass it to fib_get_table(), so add an
argument.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 drivers/infiniband/core/addr.c |4 ++--
 drivers/infiniband/core/cma.c  |2 +-
 include/linux/inetdevice.h |2 +-
 net/ipv4/fib_frontend.c|4 ++--
 net/ipv4/igmp.c|2 +-
 net/ipv4/ip_sockglue.c |2 +-
 net/ipv4/ipmr.c|2 +-
 net/ipv4/route.c   |6 +++---
 8 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0802b79..963177e 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -110,7 +110,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct 
rdma_dev_addr *dev_addr)
__be32 ip = ((struct sockaddr_in *) addr)-sin_addr.s_addr;
int ret;
 
-   dev = ip_dev_find(ip);
+   dev = ip_dev_find(init_net, ip);
if (!dev)
return -EADDRNOTAVAIL;
 
@@ -261,7 +261,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
__be32 dst_ip = dst_in-sin_addr.s_addr;
int ret;
 
-   dev = ip_dev_find(dst_ip);
+   dev = ip_dev_find(init_net, dst_ip);
if (!dev)
return -EADDRNOTAVAIL;
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index b37045c..ef9efb3 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1280,7 +1280,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
atomic_inc(conn_id-dev_remove);
conn_id-state = CMA_CONNECT;
 
-   dev = ip_dev_find(iw_event-local_addr.sin_addr.s_addr);
+   dev = ip_dev_find(init_net, iw_event-local_addr.sin_addr.s_addr);
if (!dev) {
ret = -EADDRNOTAVAIL;
cma_enable_remove(conn_id);
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index e74a2ee..8d9eaae 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -129,7 +129,7 @@ struct in_ifaddr
 extern int register_inetaddr_notifier(struct notifier_block *nb);
 extern int unregister_inetaddr_notifier(struct notifier_block *nb);
 
-extern struct net_device   *ip_dev_find(__be32 addr);
+extern struct net_device *ip_dev_find(struct net *net, __be32 addr);
 extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, 
__be32 b);
 extern int devinet_ioctl(unsigned int cmd, void __user *);
 extern voiddevinet_init(void);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7e3e732..d282618 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -153,7 +153,7 @@ static void fib_flush(struct net *net)
  * Find the first device with a given source address.
  */
 
-struct net_device * ip_dev_find(__be32 addr)
+struct net_device * ip_dev_find(struct net *net, __be32 addr)
 {
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
struct fib_result res;
@@ -164,7 +164,7 @@ struct net_device * ip_dev_find(__be32 addr)
res.r = NULL;
 #endif
 
-   local_table = fib_get_table(init_net, RT_TABLE_LOCAL);
+   local_table = fib_get_table(net, RT_TABLE_LOCAL);
if (!local_table || local_table-tb_lookup(local_table, fl, res))
return NULL;
if (res.type != RTN_LOCAL)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 928bc32..1f5314c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1395,7 +1395,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn 
*imr)
return idev;
}
if (imr-imr_address.s_addr) {
-   dev = ip_dev_find(imr-imr_address.s_addr);
+   dev = ip_dev_find(init_net, imr-imr_address.s_addr);
if (!dev)
return NULL;
dev_put(dev);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 82817e5..754b0a5 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -594,7 +594,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = 0;
break;
}
-   dev = ip_dev_find(mreq.imr_address.s_addr);
+   dev = ip_dev_find(init_net, mreq.imr_address.s_addr);
if (dev) {
mreq.imr_ifindex = dev-ifindex;
dev_put(dev);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 4198615..2212717 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -423,7 +423,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
return -ENOBUFS;
break;
case 0:
-   dev = ip_dev_find(vifc-vifc_lcl_addr.s_addr);
+   dev = ip_dev_find(init_net, vifc-vifc_lcl_addr.s_addr);
if (!dev)
return

[PATCH 1/12 net-2.6.25] [IPV4]: Declarations cleanup in ip_fib.h.

2008-01-22 Thread Denis V. Lunev
Two small issues fixed:
- fib_select_multipath is exported from fib_semantics.c rather than from
  fib_frontend.c. So, move the declaration below appropriate comment.
- struct rt_entry declaration is not used. Drop it.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h |4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a859124..be70b33 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -222,15 +222,13 @@ extern const struct nla_policy rtm_ipv4_policy[];
 extern voidip_fib_init(void);
 extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
   struct net_device *dev, __be32 *spec_dst, u32 
*itag);
-extern void fib_select_multipath(const struct flowi *flp, struct fib_result 
*res);
-
-struct rtentry;
 
 /* Exported by fib_semantics.c */
 extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
 extern int fib_sync_down(__be32 local, struct net_device *dev, int force);
 extern int fib_sync_up(struct net_device *dev);
 extern __be32  __fib_res_prefsrc(struct fib_result *res);
+extern void fib_select_multipath(const struct flowi *flp, struct fib_result 
*res);
 
 /* Exported by fib_{hash|trie}.c */
 extern void fib_hash_init(void);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/12 net-2.6.25] [NETNS]: Re-export init_net via EXPORT_SYMBOL.

2008-01-22 Thread Denis V. Lunev
init_net is used added as a parameter to a lot of old API calls, f.e.
ip_dev_find. These calls were exported as EXPORT_SYMBOL. So, export init_net
as EXPORT_SYMBOL to keep networking API consistent.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/core/net_namespace.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8023208..26e941d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -18,7 +18,7 @@ static DEFINE_MUTEX(net_mutex);
 LIST_HEAD(net_namespace_list);
 
 struct net init_net;
-EXPORT_SYMBOL_GPL(init_net);
+EXPORT_SYMBOL(init_net);
 
 /*
  * setup_net runs the initializers for the network namespace object.
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/12 net-2.6.25] [IPV4]: Consolidate fib_select_default.

2008-01-22 Thread Denis V. Lunev
The difference in the implementation of the fib_select_default when
CONFIG_IP_MULTIPLE_TABLES is (not) defined looks negligible. Consolidate it
and place into fib_frontend.c.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h|   10 +-
 net/ipv4/fib_frontend.c |   14 ++
 net/ipv4/fib_rules.c|   10 --
 3 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index be70b33..39f944a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -193,14 +193,6 @@ static inline int fib_lookup(struct net *net, const struct 
flowi *flp,
return -ENETUNREACH;
 }
 
-static inline void fib_select_default(const struct flowi *flp,
- struct fib_result *res)
-{
-   struct fib_table *table = fib_get_table(init_net, RT_TABLE_MAIN);
-   if (FIB_RES_GW(*res)  FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
-   table-tb_select_default(table, flp, res);
-}
-
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 extern int __net_init fib4_rules_init(struct net *net);
 extern void __net_exit fib4_rules_exit(struct net *net);
@@ -213,7 +205,6 @@ extern int fib_lookup(struct net *n, struct flowi *flp, 
struct fib_result *res);
 
 extern struct fib_table *fib_new_table(struct net *net, u32 id);
 extern struct fib_table *fib_get_table(struct net *net, u32 id);
-extern void fib_select_default(const struct flowi *flp, struct fib_result 
*res);
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
@@ -222,6 +213,7 @@ extern const struct nla_policy rtm_ipv4_policy[];
 extern voidip_fib_init(void);
 extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
   struct net_device *dev, __be32 *spec_dst, u32 
*itag);
+extern void fib_select_default(const struct flowi *flp, struct fib_result 
*res);
 
 /* Exported by fib_semantics.c */
 extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6761639..13bf01d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -116,6 +116,20 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 }
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
+void fib_select_default(const struct flowi *flp, struct fib_result *res)
+{
+   struct fib_table *tb;
+   int table = RT_TABLE_MAIN;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+   if (res-r == NULL || res-r-action != FR_ACT_TO_TBL)
+   return;
+   table = res-r-table;
+#endif
+   tb = fib_get_table(init_net, table);
+   if (FIB_RES_GW(*res)  FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+   tb-tb_select_default(tb, flp, res);
+}
+
 static void fib_flush(struct net *net)
 {
int flushed = 0;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 1effb4a..19274d0 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -102,16 +102,6 @@ errout:
 }
 
 
-void fib_select_default(const struct flowi *flp, struct fib_result *res)
-{
-   if (res-r  res-r-action == FR_ACT_TO_TBL 
-   FIB_RES_GW(*res)  FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
-   struct fib_table *tb;
-   if ((tb = fib_get_table(init_net, res-r-table)) != NULL)
-   tb-tb_select_default(tb, flp, res);
-   }
-}
-
 static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 {
struct fib4_rule *r = (struct fib4_rule *) rule;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/12 net-2.6.25] [NETNS]: Add namespace parameter to __ip_route_output_key.

2008-01-22 Thread Denis V. Lunev
This is only required to propagate it down to the ip_route_output_slow.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/route.h |4 ++--
 net/ipv4/icmp.c |4 ++--
 net/ipv4/route.c|7 ---
 net/ipv4/xfrm4_policy.c |2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index 5847e6f..3e3b14e 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -110,7 +110,7 @@ extern int  ip_rt_init(void);
 extern voidip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
   __be32 src, struct net_device *dev);
 extern voidrt_cache_flush(int how);
-extern int __ip_route_output_key(struct rtable **, const struct 
flowi *flp);
+extern int __ip_route_output_key(struct net *, struct rtable **, 
const struct flowi *flp);
 extern int ip_route_output_key(struct rtable **, struct flowi 
*flp);
 extern int ip_route_output_flow(struct rtable **rp, struct flowi 
*flp, struct sock *sk, int flags);
 extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, 
u8 tos, struct net_device *devin);
@@ -158,7 +158,7 @@ static inline int ip_route_connect(struct rtable **rp, 
__be32 dst,
 
int err;
if (!dst || !src) {
-   err = __ip_route_output_key(rp, fl);
+   err = __ip_route_output_key(init_net, rp, fl);
if (err)
return err;
fl.fl4_dst = (*rp)-rt_dst;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7ed8c50..21422bf 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -569,7 +569,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, 
__be32 info)
struct rtable *rt2;
 
security_skb_classify_flow(skb_in, fl);
-   if (__ip_route_output_key(rt, fl))
+   if (__ip_route_output_key(init_net, rt, fl))
goto out_unlock;
 
/* No need to clone since we're just using its address. */
@@ -592,7 +592,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, 
__be32 info)
goto out_unlock;
 
if (inet_addr_type(init_net, fl.fl4_src) == RTN_LOCAL)
-   err = __ip_route_output_key(rt2, fl);
+   err = __ip_route_output_key(init_net, rt2, fl);
else {
struct flowi fl2 = {};
struct dst_entry *odst;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c1f9950..cb035cc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2442,7 +2442,8 @@ make_route:
 out:   return err;
 }
 
-int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
+int __ip_route_output_key(struct net *net, struct rtable **rp,
+ const struct flowi *flp)
 {
unsigned hash;
struct rtable *rth;
@@ -2469,7 +2470,7 @@ int __ip_route_output_key(struct rtable **rp, const 
struct flowi *flp)
}
rcu_read_unlock_bh();
 
-   return ip_route_output_slow(init_net, rp, flp);
+   return ip_route_output_slow(net, rp, flp);
 }
 
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
@@ -2535,7 +2536,7 @@ int ip_route_output_flow(struct rtable **rp, struct flowi 
*flp, struct sock *sk,
 {
int err;
 
-   if ((err = __ip_route_output_key(rp, flp)) != 0)
+   if ((err = __ip_route_output_key(init_net, rp, flp)) != 0)
return err;
 
if (flp-proto) {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index f04516c..3783e3e 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -36,7 +36,7 @@ static struct dst_entry *xfrm4_dst_lookup(int tos, 
xfrm_address_t *saddr,
if (saddr)
fl.fl4_src = saddr-a4;
 
-   err = __ip_route_output_key(rt, fl);
+   err = __ip_route_output_key(init_net, rt, fl);
dst = rt-u.dst;
if (err)
dst = ERR_PTR(err);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 9/12 net-2.6.25] [NETNS]: Add namespace parameter to ip_route_output_key.

2008-01-22 Thread Denis V. Lunev
Needed to propagate it down to the ip_route_output_flow.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 drivers/infiniband/core/addr.c   |4 ++--
 drivers/net/bonding/bond_main.c  |2 +-
 include/net/route.h  |2 +-
 net/atm/clip.c   |2 +-
 net/bridge/br_netfilter.c|2 +-
 net/ipv4/arp.c   |6 +++---
 net/ipv4/icmp.c  |4 ++--
 net/ipv4/igmp.c  |6 +++---
 net/ipv4/ip_gre.c|   10 +-
 net/ipv4/ip_output.c |2 +-
 net/ipv4/ipip.c  |8 
 net/ipv4/ipmr.c  |4 ++--
 net/ipv4/ipvs/ip_vs_xmit.c   |6 +++---
 net/ipv4/netfilter.c |6 +++---
 net/ipv4/netfilter/nf_nat_rule.c |2 +-
 net/ipv4/route.c |6 +++---
 net/ipv4/syncookies.c|2 +-
 net/ipv6/ip6_tunnel.c|4 ++--
 net/ipv6/sit.c   |4 ++--
 net/rxrpc/ar-peer.c  |2 +-
 net/sctp/protocol.c  |4 ++--
 21 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 963177e..a58ad8a 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -158,7 +158,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in)
 
memset(fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
-   if (ip_route_output_key(rt, fl))
+   if (ip_route_output_key(init_net, rt, fl))
return;
 
neigh_event_send(rt-u.dst.neighbour, NULL);
@@ -179,7 +179,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
memset(fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
-   ret = ip_route_output_key(rt, fl);
+   ret = ip_route_output_key(init_net, rt, fl);
if (ret)
goto out;
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b0b2603..7a7be20 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2513,7 +2513,7 @@ static void bond_arp_send_all(struct bonding *bond, 
struct slave *slave)
fl.fl4_dst = targets[i];
fl.fl4_tos = RTO_ONLINK;
 
-   rv = ip_route_output_key(rt, fl);
+   rv = ip_route_output_key(init_net, rt, fl);
if (rv) {
if (net_ratelimit()) {
printk(KERN_WARNING DRV_NAME
diff --git a/include/net/route.h b/include/net/route.h
index 6b970d7..d9b876a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -111,7 +111,7 @@ extern void ip_rt_redirect(__be32 old_gw, __be32 
dst, __be32 new_gw,
   __be32 src, struct net_device *dev);
 extern voidrt_cache_flush(int how);
 extern int __ip_route_output_key(struct net *, struct rtable **, 
const struct flowi *flp);
-extern int ip_route_output_key(struct rtable **, struct flowi 
*flp);
+extern int ip_route_output_key(struct net *, struct rtable **, 
struct flowi *flp);
 extern int ip_route_output_flow(struct net *, struct rtable **rp, 
struct flowi *flp, struct sock *sk, int flags);
 extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, 
u8 tos, struct net_device *devin);
 extern unsigned short  ip_rt_frag_needed(struct iphdr *iph, unsigned short 
new_mtu);
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 45e0862..86b885e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -534,7 +534,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
unlink_clip_vcc(clip_vcc);
return 0;
}
-   error = ip_route_output_key(rt, fl);
+   error = ip_route_output_key(init_net, rt, fl);
if (error)
return error;
neigh = __neigh_lookup(clip_tbl, ip, rt-u.dst.dev, 1);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 0e884fe..d4579cf 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -336,7 +336,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
if (err != -EHOSTUNREACH || !in_dev || 
IN_DEV_FORWARD(in_dev))
goto free_skb;
 
-   if (!ip_route_output_key(rt, fl)) {
+   if (!ip_route_output_key(init_net, rt, fl)) {
/* - Bridged-and-DNAT'ed traffic doesn't
 *   require ip_forwarding. */
if (((struct dst_entry *)rt)-dev == dev) {
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a44ff1a..a3cfd04 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -424,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct 
net_device *dev)
int flag = 0;
/*unsigned

Re: [PATCH 5/12 net-2.6.25] [NETNS]: Re-export init_net via EXPORT_SYMBOL.

2008-01-22 Thread Denis V. Lunev
Patrick McHardy wrote:
 Denis V. Lunev wrote:
 init_net is used added as a parameter to a lot of old API calls, f.e.
 ip_dev_find. These calls were exported as EXPORT_SYMBOL. So, export
 init_net
 as EXPORT_SYMBOL to keep networking API consistent.
 
 
 I think this should go in 2.6.24 if still possible so people
 don't have to find workarounds that will be obsolete one
 version later.
 
yep, sure :) should I send this one separate for 2.4?
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/12 net-2.6.25] [NETNS]: Correct namespace for connect-time routing.

2008-01-22 Thread Denis V. Lunev
ip_route_connect and ip_route_newports are a part of routing API presented to
the socket layer. The namespace is available inside them through a socket.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/route.h |8 +---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index d9b876a..1985d82 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -33,6 +33,7 @@
 #include linux/ip.h
 #include linux/cache.h
 #include linux/security.h
+#include net/sock.h
 
 #ifndef __KERNEL__
 #warning This file is not supposed to be used outside of kernel.
@@ -157,8 +158,9 @@ static inline int ip_route_connect(struct rtable **rp, 
__be32 dst,
 .dport = dport } } };
 
int err;
+   struct net *net = sk-sk_net;
if (!dst || !src) {
-   err = __ip_route_output_key(init_net, rp, fl);
+   err = __ip_route_output_key(net, rp, fl);
if (err)
return err;
fl.fl4_dst = (*rp)-rt_dst;
@@ -167,7 +169,7 @@ static inline int ip_route_connect(struct rtable **rp, 
__be32 dst,
*rp = NULL;
}
security_sk_classify_flow(sk, fl);
-   return ip_route_output_flow(init_net, rp, fl, sk, flags);
+   return ip_route_output_flow(net, rp, fl, sk, flags);
 }
 
 static inline int ip_route_newports(struct rtable **rp, u8 protocol,
@@ -184,7 +186,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 
protocol,
ip_rt_put(*rp);
*rp = NULL;
security_sk_classify_flow(sk, fl);
-   return ip_route_output_flow(init_net, rp, fl, sk, 0);
+   return ip_route_output_flow(sk-sk_net, rp, fl, sk, 0);
}
return 0;
 }
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/12 net-2.6.25] [NETNS]: Routing cache virtualization.

2008-01-22 Thread Denis V. Lunev
Basically, this piece looks relatively easy. Namespace is already available
on the dst entry via device and the device is safe to dereferrence. Compare
it with one of a searcher and skip entry if appropriate.

The only exception is ip_rt_frag_needed. So, add namespace parameter to it.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/route.h |2 +-
 net/ipv4/icmp.c |2 +-
 net/ipv4/route.c|   21 -
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index 1985d82..4eabf00 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -115,7 +115,7 @@ extern int  __ip_route_output_key(struct net *, 
struct rtable **, const struct f
 extern int ip_route_output_key(struct net *, struct rtable **, 
struct flowi *flp);
 extern int ip_route_output_flow(struct net *, struct rtable **rp, 
struct flowi *flp, struct sock *sk, int flags);
 extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, 
u8 tos, struct net_device *devin);
-extern unsigned short  ip_rt_frag_needed(struct iphdr *iph, unsigned short 
new_mtu);
+extern unsigned short  ip_rt_frag_needed(struct net *net, struct iphdr *iph, 
unsigned short new_mtu);
 extern voidip_rt_send_redirect(struct sk_buff *skb);
 
 extern unsignedinet_addr_type(struct net *net, __be32 addr);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c04aac5..052b278 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -696,7 +696,7 @@ static void icmp_unreach(struct sk_buff *skb)
 and DF set.\n,
   NIPQUAD(iph-daddr));
} else {
-   info = ip_rt_frag_needed(iph,
+   info = ip_rt_frag_needed(init_net, iph,
 ntohs(icmph-un.frag.mtu));
if (!info)
goto out;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 87076c6..07dd295 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -648,6 +648,11 @@ static inline int compare_keys(struct flowi *fl1, struct 
flowi *fl2)
(fl1-iif ^ fl2-iif)) == 0;
 }
 
+static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
+{
+   return rt1-u.dst.dev-nd_net == rt2-u.dst.dev-nd_net;
+}
+
 /*
  * Perform a full scan of hash table and free all entries.
  * Can be called by a softirq or a process.
@@ -961,7 +966,7 @@ restart:
 
spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) {
-   if (compare_keys(rth-fl, rt-fl)) {
+   if (compare_keys(rth-fl, rt-fl)  compare_netns(rth, rt)) {
/* Put it first */
*rthp = rth-u.dst.rt_next;
/*
@@ -1415,7 +1420,8 @@ static __inline__ unsigned short guess_mtu(unsigned short 
old_mtu)
return 68;
 }
 
-unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
+unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
+unsigned short new_mtu)
 {
int i;
unsigned short old_mtu = ntohs(iph-tot_len);
@@ -1438,7 +1444,8 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, 
unsigned short new_mtu)
rth-rt_dst  == daddr 
rth-rt_src  == iph-saddr 
rth-fl.iif == 0 
-   !(dst_metric_locked(rth-u.dst, RTAX_MTU))) {
+   !(dst_metric_locked(rth-u.dst, RTAX_MTU)) 
+   rth-u.dst.dev-nd_net == net) {
unsigned short mtu = new_mtu;
 
if (new_mtu  68 || new_mtu = old_mtu) {
@@ -2049,7 +2056,9 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, 
__be32 saddr,
struct rtable * rth;
unsignedhash;
int iif = dev-ifindex;
+   struct net *net;
 
+   net = skb-dev-nd_net;
tos = IPTOS_RT_MASK;
hash = rt_hash(daddr, saddr, iif);
 
@@ -2061,7 +2070,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, 
__be32 saddr,
rth-fl.iif == iif 
rth-fl.oif == 0 
rth-fl.mark == skb-mark 
-   rth-fl.fl4_tos == tos) {
+   rth-fl.fl4_tos == tos 
+   rth-u.dst.dev-nd_net == net) {
dst_use(rth-u.dst, jiffies);
RT_CACHE_STAT_INC(in_hit);
rcu_read_unlock();
@@ -2459,7 +2469,8 @@ int __ip_route_output_key(struct net *net, struct rtable 
**rp,
rth-fl.oif == flp-oif 
rth-fl.mark == flp-mark 
!((rth-fl.fl4_tos ^ flp-fl4_tos

[PATCH 12/12 net-2.6.25] [NETNS]: Add namespace for ICMP replying code.

2008-01-22 Thread Denis V. Lunev
All needed API is done, the namespace is available when required from the
device on the DST entry from the incoming packet. So, just replace init_net
with proper namespace.

Other protocols will follow.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/icmp.c  |   21 +
 net/ipv4/ip_output.c |2 +-
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 052b278..a6c092c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -404,7 +404,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct 
sk_buff *skb)
.tos = RT_TOS(ip_hdr(skb)-tos) 
} },
.proto = IPPROTO_ICMP };
security_skb_classify_flow(skb, fl);
-   if (ip_route_output_key(init_net, rt, fl))
+   if (ip_route_output_key(rt-u.dst.dev-nd_net, rt, fl))
goto out_unlock;
}
if (icmpv4_xrlim_allow(rt, icmp_param-data.icmph.type,
@@ -436,9 +436,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, 
__be32 info)
struct ipcm_cookie ipc;
__be32 saddr;
u8  tos;
+   struct net *net;
 
if (!rt)
goto out;
+   net = rt-u.dst.dev-nd_net;
 
/*
 *  Find the original header. It is expected to be valid, of course.
@@ -514,7 +516,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, 
__be32 info)
struct net_device *dev = NULL;
 
if (rt-fl.iif  sysctl_icmp_errors_use_inbound_ifaddr)
-   dev = dev_get_by_index(init_net, rt-fl.iif);
+   dev = dev_get_by_index(net, rt-fl.iif);
 
if (dev) {
saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
@@ -569,7 +571,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, 
__be32 info)
struct rtable *rt2;
 
security_skb_classify_flow(skb_in, fl);
-   if (__ip_route_output_key(init_net, rt, fl))
+   if (__ip_route_output_key(net, rt, fl))
goto out_unlock;
 
/* No need to clone since we're just using its address. */
@@ -591,14 +593,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int 
code, __be32 info)
if (xfrm_decode_session_reverse(skb_in, fl, AF_INET))
goto out_unlock;
 
-   if (inet_addr_type(init_net, fl.fl4_src) == RTN_LOCAL)
-   err = __ip_route_output_key(init_net, rt2, fl);
+   if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL)
+   err = __ip_route_output_key(net, rt2, fl);
else {
struct flowi fl2 = {};
struct dst_entry *odst;
 
fl2.fl4_dst = fl.fl4_src;
-   if (ip_route_output_key(init_net, rt2, fl2))
+   if (ip_route_output_key(net, rt2, fl2))
goto out_unlock;
 
/* Ugh! */
@@ -666,6 +668,9 @@ static void icmp_unreach(struct sk_buff *skb)
int hash, protocol;
struct net_protocol *ipprot;
u32 info = 0;
+   struct net *net;
+
+   net = skb-dst-dev-nd_net;
 
/*
 *  Incomplete header ?
@@ -696,7 +701,7 @@ static void icmp_unreach(struct sk_buff *skb)
 and DF set.\n,
   NIPQUAD(iph-daddr));
} else {
-   info = ip_rt_frag_needed(init_net, iph,
+   info = ip_rt_frag_needed(net, iph,
 ntohs(icmph-un.frag.mtu));
if (!info)
goto out;
@@ -734,7 +739,7 @@ static void icmp_unreach(struct sk_buff *skb)
 */
 
if (!sysctl_icmp_ignore_bogus_error_responses 
-   inet_addr_type(init_net, iph-daddr) == RTN_BROADCAST) {
+   inet_addr_type(net, iph-daddr) == RTN_BROADCAST) {
if (net_ratelimit())
printk(KERN_WARNING %u.%u.%u.%u sent an invalid ICMP 
type %u, code %u 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6a5b839..4fad239 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1377,7 +1377,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, 
struct ip_reply_arg *ar
 .dport = tcp_hdr(skb)-source 
} },
.proto = sk-sk_protocol };
security_skb_classify_flow(skb, fl);
-   if (ip_route_output_key(init_net, rt, fl))
+   if (ip_route_output_key(sk-sk_net, rt, fl

[PATCH 0/6 net-2.6.25] Provide correct namespace on IPv4 packet input path.

2008-01-21 Thread Denis V. Lunev
This patchset sequentially adds namespace parameter to fib_lookup and
inetdev_by_index. After that it is possible to pass network namespace
from input packet to routing engine.

Output path is much more intrusive and will be sent separately.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/6 net-2.6.25] [NETNS] Pass correct namespace in fib_validate_source.

2008-01-21 Thread Denis V. Lunev
Correct network namespace is available inside fib_validate_source. It can be
obtained from the device passed in. The device is not NULL as in_device is
obtained from it just above.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/fib_frontend.c |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index dcd3a28..39b8b35 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -243,6 +243,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int 
oif,
struct fib_result res;
int no_addr, rpf;
int ret;
+   struct net *net;
 
no_addr = rpf = 0;
rcu_read_lock();
@@ -256,7 +257,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int 
oif,
if (in_dev == NULL)
goto e_inval;
 
-   if (fib_lookup(init_net, fl, res))
+   net = dev-nd_net;
+   if (fib_lookup(net, fl, res))
goto last_resort;
if (res.type != RTN_UNICAST)
goto e_inval_res;
@@ -280,7 +282,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int 
oif,
fl.oif = dev-ifindex;
 
ret = 0;
-   if (fib_lookup(init_net, fl, res) == 0) {
+   if (fib_lookup(net, fl, res) == 0) {
if (res.type == RTN_UNICAST) {
*spec_dst = FIB_RES_PREFSRC(res);
ret = FIB_RES_NH(res).nh_scope = RT_SCOPE_HOST;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/6 net-2.6.25] [NETNS] Pass correct namespace in ip_route_input_slow.

2008-01-21 Thread Denis V. Lunev
The packet on the input path always has a referrence to an input network
device it is passed from. Extract network namespace from it.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |7 ---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c107bc3..b3c6122 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1881,6 +1881,7 @@ static int ip_route_input_slow(struct sk_buff *skb, 
__be32 daddr, __be32 saddr,
__be32  spec_dst;
int err = -EINVAL;
int free_res = 0;
+   struct net* net = dev-nd_net;
 
/* IP on this device is disabled. */
 
@@ -1911,7 +1912,7 @@ static int ip_route_input_slow(struct sk_buff *skb, 
__be32 daddr, __be32 saddr,
/*
 *  Now we are ready to route packet.
 */
-   if ((err = fib_lookup(init_net, fl, res)) != 0) {
+   if ((err = fib_lookup(net, fl, res)) != 0) {
if (!IN_DEV_FORWARD(in_dev))
goto e_hostunreach;
goto no_route;
@@ -1926,7 +1927,7 @@ static int ip_route_input_slow(struct sk_buff *skb, 
__be32 daddr, __be32 saddr,
if (res.type == RTN_LOCAL) {
int result;
result = fib_validate_source(saddr, daddr, tos,
-init_net.loopback_dev-ifindex,
+net-loopback_dev-ifindex,
 dev, spec_dst, itag);
if (result  0)
goto martian_source;
@@ -1988,7 +1989,7 @@ local_input:
 #endif
rth-rt_iif =
rth-fl.iif = dev-ifindex;
-   rth-u.dst.dev  = init_net.loopback_dev;
+   rth-u.dst.dev  = net-loopback_dev;
dev_hold(rth-u.dst.dev);
rth-idev   = in_dev_get(rth-u.dst.dev);
rth-rt_gateway = daddr;
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/6 net-2.6.25] [NETNS] Pass correct namespace in context fib_check_nh.

2008-01-21 Thread Denis V. Lunev
Correct network namespace is already used in fib_check_nh. Re-work its usage
for better readability and pass into fib_lookup  inetdev_by_index.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/fib_semantics.c |   12 ++--
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8b47e11..c791286 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -519,7 +519,9 @@ static int fib_check_nh(struct fib_config *cfg, struct 
fib_info *fi,
struct fib_nh *nh)
 {
int err;
+   struct net *net;
 
+   net = cfg-fc_nlinfo.nl_net;
if (nh-nh_gw) {
struct fib_result res;
 
@@ -532,11 +534,9 @@ static int fib_check_nh(struct fib_config *cfg, struct 
fib_info *fi,
 
if (cfg-fc_scope = RT_SCOPE_LINK)
return -EINVAL;
-   if (inet_addr_type(cfg-fc_nlinfo.nl_net,
-  nh-nh_gw) != RTN_UNICAST)
+   if (inet_addr_type(net, nh-nh_gw) != RTN_UNICAST)
return -EINVAL;
-   if ((dev = __dev_get_by_index(cfg-fc_nlinfo.nl_net,
- nh-nh_oif)) == NULL)
+   if ((dev = __dev_get_by_index(net, nh-nh_oif)) == NULL)
return -ENODEV;
if (!(dev-flagsIFF_UP))
return -ENETDOWN;
@@ -559,7 +559,7 @@ static int fib_check_nh(struct fib_config *cfg, struct 
fib_info *fi,
/* It is not necessary, but requires a bit of thinking 
*/
if (fl.fl4_scope  RT_SCOPE_LINK)
fl.fl4_scope = RT_SCOPE_LINK;
-   if ((err = fib_lookup(init_net, fl, res)) != 0)
+   if ((err = fib_lookup(net, fl, res)) != 0)
return err;
}
err = -EINVAL;
@@ -583,7 +583,7 @@ out:
if (nh-nh_flags(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
return -EINVAL;
 
-   in_dev = inetdev_by_index(init_net, nh-nh_oif);
+   in_dev = inetdev_by_index(net, nh-nh_oif);
if (in_dev == NULL)
return -ENODEV;
if (!(in_dev-dev-flagsIFF_UP)) {
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/6 net-2.6.25] [NETNS] Pass correct namespace in ip_rt_get_source.

2008-01-21 Thread Denis V. Lunev
ip_rt_get_source is the infamous place for which dst_ifdown kludges have
been implemented. This means that rt-u.dst.dev can be safely dereferrenced
obtain nd_net.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 net/ipv4/route.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b3c6122..ede0571 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1559,7 +1559,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
if (rt-fl.iif == 0)
src = rt-rt_src;
-   else if (fib_lookup(init_net, rt-fl, res) == 0) {
+   else if (fib_lookup(rt-u.dst.dev-nd_net, rt-fl, res) == 0) {
src = FIB_RES_PREFSRC(res);
fib_res_put(res);
} else
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/6 net-2.6.25] [NETNS] Add netns parameter to fib_lookup.

2008-01-21 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/net/ip_fib.h |9 +
 net/ipv4/fib_frontend.c  |4 ++--
 net/ipv4/fib_rules.c |4 ++--
 net/ipv4/fib_semantics.c |2 +-
 net/ipv4/route.c |6 +++---
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 08ebb1e..9daa60b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -178,15 +178,16 @@ static inline struct fib_table *fib_new_table(struct net 
*net, u32 id)
return fib_get_table(net, id);
 }
 
-static inline int fib_lookup(const struct flowi *flp, struct fib_result *res)
+static inline int fib_lookup(struct net *net, const struct flowi *flp,
+struct fib_result *res)
 {
struct fib_table *table;
 
-   table = fib_get_table(init_net, RT_TABLE_LOCAL);
+   table = fib_get_table(net, RT_TABLE_LOCAL);
if (!table-tb_lookup(table, flp, res))
return 0;
 
-   table = fib_get_table(init_net, RT_TABLE_MAIN);
+   table = fib_get_table(net, RT_TABLE_MAIN);
if (!table-tb_lookup(table, flp, res))
return 0;
return -ENETUNREACH;
@@ -200,7 +201,7 @@ extern void __net_exit fib4_rules_exit(struct net *net);
 extern u32 fib_rules_tclass(struct fib_result *res);
 #endif
 
-extern int fib_lookup(struct flowi *flp, struct fib_result *res);
+extern int fib_lookup(struct net *n, struct flowi *flp, struct fib_result 
*res);
 
 extern struct fib_table *fib_new_table(struct net *net, u32 id);
 extern struct fib_table *fib_get_table(struct net *net, u32 id);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8c0081c..dcd3a28 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -256,7 +256,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int 
oif,
if (in_dev == NULL)
goto e_inval;
 
-   if (fib_lookup(fl, res))
+   if (fib_lookup(init_net, fl, res))
goto last_resort;
if (res.type != RTN_UNICAST)
goto e_inval_res;
@@ -280,7 +280,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int 
oif,
fl.oif = dev-ifindex;
 
ret = 0;
-   if (fib_lookup(fl, res) == 0) {
+   if (fib_lookup(init_net, fl, res) == 0) {
if (res.type == RTN_UNICAST) {
*spec_dst = FIB_RES_PREFSRC(res);
ret = FIB_RES_NH(res).nh_scope = RT_SCOPE_HOST;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 2b43002..19274d0 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -54,14 +54,14 @@ u32 fib_rules_tclass(struct fib_result *res)
 }
 #endif
 
-int fib_lookup(struct flowi *flp, struct fib_result *res)
+int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
 {
struct fib_lookup_arg arg = {
.result = res,
};
int err;
 
-   err = fib_rules_lookup(init_net.ipv4.rules_ops, flp, 0, arg);
+   err = fib_rules_lookup(net-ipv4.rules_ops, flp, 0, arg);
res-r = arg.rule;
 
return err;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0e08df4..ecd91c6 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -559,7 +559,7 @@ static int fib_check_nh(struct fib_config *cfg, struct 
fib_info *fi,
/* It is not necessary, but requires a bit of thinking 
*/
if (fl.fl4_scope  RT_SCOPE_LINK)
fl.fl4_scope = RT_SCOPE_LINK;
-   if ((err = fib_lookup(fl, res)) != 0)
+   if ((err = fib_lookup(init_net, fl, res)) != 0)
return err;
}
err = -EINVAL;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 162e738..c107bc3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1559,7 +1559,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
if (rt-fl.iif == 0)
src = rt-rt_src;
-   else if (fib_lookup(rt-fl, res) == 0) {
+   else if (fib_lookup(init_net, rt-fl, res) == 0) {
src = FIB_RES_PREFSRC(res);
fib_res_put(res);
} else
@@ -1911,7 +1911,7 @@ static int ip_route_input_slow(struct sk_buff *skb, 
__be32 daddr, __be32 saddr,
/*
 *  Now we are ready to route packet.
 */
-   if ((err = fib_lookup(fl, res)) != 0) {
+   if ((err = fib_lookup(init_net, fl, res)) != 0) {
if (!IN_DEV_FORWARD(in_dev))
goto e_hostunreach;
goto no_route;
@@ -2363,7 +2363,7 @@ static int ip_route_output_slow(struct rtable **rp, const 
struct flowi *oldflp)
goto make_route;
}
 
-   if (fib_lookup(fl, res)) {
+   if (fib_lookup(init_net, fl, res)) {
res.fi = NULL;
if (oldflp-oif

[PATCH 2/6 net-2.6.25] [NETNS] Add netns parameter to inetdev_by_index.

2008-01-21 Thread Denis V. Lunev
Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
---
 include/linux/inetdevice.h |2 +-
 net/ipv4/devinet.c |6 +++---
 net/ipv4/fib_semantics.c   |2 +-
 net/ipv4/igmp.c|4 ++--
 net/ipv4/ip_gre.c  |3 ++-
 5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 45f3731..e74a2ee 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -133,7 +133,7 @@ extern struct net_device*ip_dev_find(__be32 addr);
 extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, 
__be32 b);
 extern int devinet_ioctl(unsigned int cmd, void __user *);
 extern voiddevinet_init(void);
-extern struct in_device*inetdev_by_index(int);
+extern struct in_device*inetdev_by_index(struct net *, int);
 extern __be32  inet_select_addr(const struct net_device *dev, __be32 
dst, int scope);
 extern __be32  inet_confirm_addr(struct in_device *in_dev, __be32 dst, 
__be32 local, int scope);
 extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 
prefix, __be32 mask);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e381edb..21f71bf 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -409,12 +409,12 @@ static int inet_set_ifa(struct net_device *dev, struct 
in_ifaddr *ifa)
return inet_insert_ifa(ifa);
 }
 
-struct in_device *inetdev_by_index(int ifindex)
+struct in_device *inetdev_by_index(struct net *net, int ifindex)
 {
struct net_device *dev;
struct in_device *in_dev = NULL;
read_lock(dev_base_lock);
-   dev = __dev_get_by_index(init_net, ifindex);
+   dev = __dev_get_by_index(net, ifindex);
if (dev)
in_dev = in_dev_get(dev);
read_unlock(dev_base_lock);
@@ -454,7 +454,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct 
nlmsghdr *nlh, void *arg
goto errout;
 
ifm = nlmsg_data(nlh);
-   in_dev = inetdev_by_index(ifm-ifa_index);
+   in_dev = inetdev_by_index(net, ifm-ifa_index);
if (in_dev == NULL) {
err = -ENODEV;
goto errout;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ecd91c6..8b47e11 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -583,7 +583,7 @@ out:
if (nh-nh_flags(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
return -EINVAL;
 
-   in_dev = inetdev_by_index(nh-nh_oif);
+   in_dev = inetdev_by_index(init_net, nh-nh_oif);
if (in_dev == NULL)
return -ENODEV;
if (!(in_dev-dev-flagsIFF_UP)) {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 285d262..b4df39a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1389,7 +1389,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn 
*imr)
struct in_device *idev = NULL;
 
if (imr-imr_ifindex) {
-   idev = inetdev_by_index(imr-imr_ifindex);
+   idev = inetdev_by_index(init_net, imr-imr_ifindex);
if (idev)
__in_dev_put(idev);
return idev;
@@ -,7 +,7 @@ void ip_mc_drop_socket(struct sock *sk)
struct in_device *in_dev;
inet-mc_list = iml-next;
 
-   in_dev = inetdev_by_index(iml-multi.imr_ifindex);
+   in_dev = inetdev_by_index(init_net, iml-multi.imr_ifindex);
(void) ip_mc_leave_src(sk, iml, in_dev);
if (in_dev != NULL) {
ip_mc_dec_group(in_dev, 
iml-multi.imr_multiaddr.s_addr);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8b81deb..a74983d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1193,7 +1193,8 @@ static int ipgre_close(struct net_device *dev)
 {
struct ip_tunnel *t = netdev_priv(dev);
if (ipv4_is_multicast(t-parms.iph.daddr)  t-mlink) {
-   struct in_device *in_dev = inetdev_by_index(t-mlink);
+   struct in_device *in_dev;
+   in_dev = inetdev_by_index(dev-nd_net, t-mlink);
if (in_dev) {
ip_mc_dec_group(in_dev, t-parms.iph.daddr);
in_dev_put(in_dev);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH ] [NETNS 3/4 net-2.6.25] Consolidate kernel netlink socket destruction.

2008-01-18 Thread Denis V. Lunev
Create a specific helper for netlink kernel socket disposal. This just
let the code look better and provides a ground for proper disposal inside
a namespace.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Tested-by: Alexey Dobriyan [EMAIL PROTECTED]
---
 drivers/connector/connector.c   |9 +++--
 drivers/scsi/scsi_netlink.c |2 +-
 drivers/scsi/scsi_transport_iscsi.c |2 +-
 fs/ecryptfs/netlink.c   |3 +--
 include/linux/netlink.h |1 +
 net/bridge/netfilter/ebt_ulog.c |4 ++--
 net/core/rtnetlink.c|2 +-
 net/decnet/netfilter/dn_rtmsg.c |4 ++--
 net/ipv4/fib_frontend.c |2 +-
 net/ipv4/inet_diag.c|2 +-
 net/ipv4/netfilter/ip_queue.c   |4 ++--
 net/ipv4/netfilter/ipt_ULOG.c   |4 ++--
 net/ipv6/netfilter/ip6_queue.c  |4 ++--
 net/netfilter/nfnetlink.c   |2 +-
 net/netlink/af_netlink.c|   11 +++
 net/xfrm/xfrm_user.c|2 +-
 16 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 37976dc..fea2d3e 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -420,8 +420,7 @@ static int __devinit cn_init(void)
 
dev-cbdev = cn_queue_alloc_dev(cqueue, dev-nls);
if (!dev-cbdev) {
-   if (dev-nls-sk_socket)
-   sock_release(dev-nls-sk_socket);
+   netlink_kernel_release(dev-nls);
return -EINVAL;
}

@@ -431,8 +430,7 @@ static int __devinit cn_init(void)
if (err) {
cn_already_initialized = 0;
cn_queue_free_dev(dev-cbdev);
-   if (dev-nls-sk_socket)
-   sock_release(dev-nls-sk_socket);
+   netlink_kernel_release(dev-nls);
return -EINVAL;
}
 
@@ -447,8 +445,7 @@ static void __devexit cn_fini(void)
 
cn_del_callback(dev-id);
cn_queue_free_dev(dev-cbdev);
-   if (dev-nls-sk_socket)
-   sock_release(dev-nls-sk_socket);
+   netlink_kernel_release(dev-nls);
 }
 
 subsys_initcall(cn_init);
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 40579ed..fe48c24 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -169,7 +169,7 @@ void
 scsi_netlink_exit(void)
 {
if (scsi_nl_sock) {
-   sock_release(scsi_nl_sock-sk_socket);
+   netlink_kernel_release(scsi_nl_sock);
netlink_unregister_notifier(scsi_netlink_notifier);
}
 
diff --git a/drivers/scsi/scsi_transport_iscsi.c 
b/drivers/scsi/scsi_transport_iscsi.c
index 5428d15..9e463a6 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1533,7 +1533,7 @@ unregister_transport_class:
 
 static void __exit iscsi_transport_exit(void)
 {
-   sock_release(nls-sk_socket);
+   netlink_kernel_release(nls);
transport_class_unregister(iscsi_connection_class);
transport_class_unregister(iscsi_session_class);
transport_class_unregister(iscsi_host_class);
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index 9aa3451..f638a69 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -237,7 +237,6 @@ out:
  */
 void ecryptfs_release_netlink(void)
 {
-   if (ecryptfs_nl_sock  ecryptfs_nl_sock-sk_socket)
-   sock_release(ecryptfs_nl_sock-sk_socket);
+   netlink_kernel_release(ecryptfs_nl_sock);
ecryptfs_nl_sock = NULL;
 }
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 2aee0f5..bd13b6f 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -178,6 +178,7 @@ extern struct sock *netlink_kernel_create(struct net *net,
  void (*input)(struct sk_buff *skb),
  struct mutex *cb_mutex,
  struct module *module);
+extern void netlink_kernel_release(struct sock *sk);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index b73ba28..8e7b00b 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -307,7 +307,7 @@ static int __init ebt_ulog_init(void)
if (!ebtulognl)
ret = -ENOMEM;
else if ((ret = ebt_register_watcher(ulog)))
-   sock_release(ebtulognl-sk_socket);
+   netlink_kernel_release(ebtulognl);
 
if (ret == 0)
nf_log_register(PF_BRIDGE, ebt_ulog_logger);
@@ -333,7 +333,7 @@ static void __exit ebt_ulog_fini(void

[PATCH] [NETNS 4/4 net-2.6.25] Namespace stop vs 'ip r l' race.

2008-01-18 Thread Denis V. Lunev
During network namespace stop process kernel side netlink sockets belonging
to a namespace should be closed. They should not prevent namespace to stop,
so they do not increment namespace usage counter. Though this counter will
be put during last sock_put.

The raplacement of the correct netns for init_ns solves the problem only
partial as socket to be stoped until proper stop is a valid netlink kernel
socket and can be looked up by the user processes. This is not a problem
until it resides in initial namespace (no processes inside this net), but
this is not true for init_net.

So, hold the referrence for a socket, remove it from lookup tables and only
after that change namespace and perform a last put.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Tested-by: Alexey Dobriyan [EMAIL PROTECTED]
---
 net/core/rtnetlink.c |   15 ++-
 net/ipv4/fib_frontend.c  |7 +--
 net/netlink/af_netlink.c |   15 +++
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2ef9480..aafc34d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1365,25 +1365,14 @@ static int rtnetlink_net_init(struct net *net)
   rtnetlink_rcv, rtnl_mutex, THIS_MODULE);
if (!sk)
return -ENOMEM;
-
-   /* Don't hold an extra reference on the namespace */
-   put_net(sk-sk_net);
net-rtnl = sk;
return 0;
 }
 
 static void rtnetlink_net_exit(struct net *net)
 {
-   struct sock *sk = net-rtnl;
-   if (sk) {
-   /* At the last minute lie and say this is a socket for the
-* initial network namespace.  So the socket will be safe to
-* free.
-*/
-   sk-sk_net = get_net(init_net);
-   netlink_kernel_release(net-rtnl);
-   net-rtnl = NULL;
-   }
+   netlink_kernel_release(net-rtnl);
+   net-rtnl = NULL;
 }
 
 static struct pernet_operations rtnetlink_net_ops = {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e787d21..62bd791 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -869,19 +869,14 @@ static int nl_fib_lookup_init(struct net *net)
   nl_fib_input, NULL, THIS_MODULE);
if (sk == NULL)
return -EAFNOSUPPORT;
-   /* Don't hold an extra reference on the namespace */
-   put_net(sk-sk_net);
net-ipv4.fibnl = sk;
return 0;
 }
 
 static void nl_fib_lookup_exit(struct net *net)
 {
-   /* At the last minute lie and say this is a socket for the
-* initial network namespace. So the socket will  be safe to free.
-*/
-   net-ipv4.fibnl-sk_net = get_net(init_net);
netlink_kernel_release(net-ipv4.fibnl);
+   net-ipv4.fibnl = NULL;
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 626a582..6b178e1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1396,6 +1396,9 @@ netlink_kernel_create(struct net *net, int unit, unsigned 
int groups,
}
netlink_table_ungrab();
 
+   /* Do not hold an extra referrence to a namespace as this socket is
+* internal to a namespace and does not prevent it to stop. */
+   put_net(net);
return sk;
 
 out_sock_release:
@@ -1411,7 +1414,19 @@ netlink_kernel_release(struct sock *sk)
 {
if (sk == NULL || sk-sk_socket == NULL)
return;
+
+   /*
+* Last sock_put should drop referrence to sk-sk_net. It has already
+* been dropped in netlink_kernel_create. Taking referrence to stopping
+* namespace is not an option.
+* Take referrence to a socket to remove it from netlink lookup table
+* _alive_ and after that destroy it in the context of init_net.
+*/
+   sock_hold(sk);
sock_release(sk-sk_socket);
+
+   sk-sk_net = get_net(init_net);
+   sock_put(sk);
 }
 EXPORT_SYMBOL(netlink_kernel_release);
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] [NETNS 2/4 net-2.6.25] Memory leak on network namespace stop.

2008-01-18 Thread Denis V. Lunev
Network namespace allocates 2 kernel netlink sockets, fibnl  rtnl. These
sockets should be disposed properly, i.e. by sock_release. Plain sock_put
is not enough.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Tested-by: Alexey Dobriyan [EMAIL PROTECTED]
---
 net/core/rtnetlink.c|2 +-
 net/ipv4/fib_frontend.c |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4a07e83..2c1f665 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1381,7 +1381,7 @@ static void rtnetlink_net_exit(struct net *net)
 * free.
 */
sk-sk_net = get_net(init_net);
-   sock_put(sk);
+   sock_release(net-rtnl-sk_socket);
net-rtnl = NULL;
}
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8ddcd3f..4e5216e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -881,7 +881,7 @@ static void nl_fib_lookup_exit(struct net *net)
 * initial network namespace. So the socket will  be safe to free.
 */
net-ipv4.fibnl-sk_net = get_net(init_net);
-   sock_put(net-ipv4.fibnl);
+   sock_release(net-ipv4.fibnl-sk_socket);
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4 net-2.6.25] Proper netlink kernel sockets disposal.

2008-01-18 Thread Denis V. Lunev
Alexey Dobriyan found, that virtualized netlink kernel sockets (fibl 
rtnl) are leaked during namespace start/stop loop.

Leaking fix (simple and obvious) reveals that netlink kernel socket
disposal leads to OOPSes:
- nl_table[protocol]-listeners is double freed
- sometimes during namespace stop netlink_sock_destruct
  BUG_TRAP(!atomic_read(sk-sk_rmem_alloc)); is hit

This set address all these issues.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Tested-by: Alexey Dobriayn [EMAIL PROTECTED]

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] [NETNS 1/4 net-2.6.25] Double free in netlink_release.

2008-01-18 Thread Denis V. Lunev
Netlink protocol table is global for all namespaces. Some netlink protocols
have been virtualized, i.e. they have per/namespace netlink socket. This
difference can easily lead to double free if more than 1 namespace is
started. Count the number of kernel netlink sockets to track that this
table is not used any more.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Tested-by: Alexey Dobriyan [EMAIL PROTECTED]
---
 net/netlink/af_netlink.c |   10 +++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 21f9e30..29fef55 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -498,9 +498,12 @@ static int netlink_release(struct socket *sock)
 
netlink_table_grab();
if (netlink_is_kernel(sk)) {
-   kfree(nl_table[sk-sk_protocol].listeners);
-   nl_table[sk-sk_protocol].module = NULL;
-   nl_table[sk-sk_protocol].registered = 0;
+   BUG_ON(nl_table[sk-sk_protocol].registered == 0);
+   if (--nl_table[sk-sk_protocol].registered == 0) {
+   kfree(nl_table[sk-sk_protocol].listeners);
+   nl_table[sk-sk_protocol].module = NULL;
+   nl_table[sk-sk_protocol].registered = 0;
+   }
} else if (nlk-subscriptions)
netlink_update_listeners(sk);
netlink_table_ungrab();
@@ -1389,6 +1392,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned 
int groups,
nl_table[unit].registered = 1;
} else {
kfree(listeners);
+   nl_table[unit].registered++;
}
netlink_table_ungrab();
 
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3 net-2.6.25] call FIB rule-action in the correct namespace

2008-01-17 Thread Denis V. Lunev
FIB rule-action should operate in the same namespace as fib_lookup.
This is definitely missed right now.

There are two ways to implement this: pass struct net into another rules
API call (2 levels) or place netns into rule struct directly. The second
approach seems better as the code will grow less.

Additionally, the patchset cleanups struct net from
fib_rules_register/unregister to have network namespace context at the
time of default rules creation.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3 net-2.6.25] Process FIB rule action in the context of the namespace.

2008-01-17 Thread Denis V. Lunev
Save namespace context on the fib rule at the rule creation time and call
routing lookup in the correct namespace.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Acked-by: Daniel Lezcano [EMAIL PROTECTED]
---
 include/net/fib_rules.h |1 +
 net/core/fib_rules.c|2 ++
 net/ipv4/fib_rules.c|2 +-
 3 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 7f9f4ae..34349f9 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -22,6 +22,7 @@ struct fib_rule
u32 target;
struct fib_rule *   ctarget;
struct rcu_head rcu;
+   struct net *fr_net;
 };
 
 struct fib_lookup_arg
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3cd4f13..42ccaf5 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -29,6 +29,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r-pref = pref;
r-table = table;
r-flags = flags;
+   r-fr_net = ops-fro_net;
 
/* The lock is not required here, the list in unreacheable
 * at the moment this function is called */
@@ -242,6 +243,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct 
nlmsghdr* nlh, void *arg)
err = -ENOMEM;
goto errout;
}
+   rule-fr_net = net;
 
if (tb[FRA_PRIORITY])
rule-pref = nla_get_u32(tb[FRA_PRIORITY]);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 3b7affd..d2001f1 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -91,7 +91,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct 
flowi *flp,
goto errout;
}
 
-   if ((tbl = fib_get_table(init_net, rule-table)) == NULL)
+   if ((tbl = fib_get_table(rule-fr_net, rule-table)) == NULL)
goto errout;
 
err = tbl-tb_lookup(tbl, flp, (struct fib_result *) arg-result);
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3 net-2.6.25] Add netns to fib_rules_ops.

2008-01-17 Thread Denis V. Lunev
The backward link from FIB rules operations to the network namespace will
allow to simplify the API a bit.

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Acked-by: Daniel Lezcano [EMAIL PROTECTED]
---
 include/net/fib_rules.h |1 +
 net/decnet/dn_rules.c   |1 +
 net/ipv4/fib_rules.c|2 ++
 net/ipv6/fib6_rules.c   |1 +
 4 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 4f47250..6910e01 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -67,6 +67,7 @@ struct fib_rules_ops
const struct nla_policy *policy;
struct list_headrules_list;
struct module   *owner;
+   struct net  *fro_net;
 };
 
 #define FRA_GENERIC_POLICY \
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index c1fae23..964e658 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -249,6 +249,7 @@ static struct fib_rules_ops dn_fib_rules_ops = {
.policy = dn_fib_rule_policy,
.rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list),
.owner  = THIS_MODULE,
+   .fro_net= init_net,
 };
 
 void __init dn_fib_rules_init(void)
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 72232ab..8d0ebe7 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -315,6 +315,8 @@ int __net_init fib4_rules_init(struct net *net)
if (ops == NULL)
return -ENOMEM;
INIT_LIST_HEAD(ops-rules_list);
+   ops-fro_net = net;
+
fib_rules_register(net, ops);
 
err = fib_default_rules_init(ops);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 76437a1..ead5ab2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -249,6 +249,7 @@ static struct fib_rules_ops fib6_rules_ops = {
.policy = fib6_rule_policy,
.rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list),
.owner  = THIS_MODULE,
+   .fro_net= init_net,
 };
 
 static int __init fib6_default_rules_init(void)
-- 
1.5.3.rc5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3 net-2.6.25] [NETNS] FIB rules API cleanup.

2008-01-17 Thread Denis V. Lunev
Remove struct net from fib_rules_register(unregister)/notify_change paths
and diet code size a bit.

add/remove: 0/0 grow/shrink: 10/12 up/down: 35/-100 (-65)
function old new   delta
notify_rule_change   273 280  +7
trie_show_stats  471 475  +4
fn_trie_delete   473 477  +4
fib_rules_unregister 144 148  +4
fib4_rule_compare119 123  +4
resize  28422845  +3
fn_trie_select_default   515 518  +3
inet_sk_rebuild_header   836 838  +2
fib_trie_seq_show764 766  +2
__devinet_sysctl_register276 278  +2
fn_trie_lookup  11241123  -1
ip_fib_check_default 133 131  -2
devinet_conf_sysctl  223 221  -2
snmp_fold_field  126 123  -3
fn_trie_insert  20912086  -5
inet_create  876 870  -6
fib4_rules_init  197 191  -6
fib_sync_down452 444  -8
inet_gso_send_check  334 325  -9
fib_create_info 30032991 -12
fib_nl_delrule   568 553 -15
fib_nl_newrule   883 852 -31

Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]
Acked-by: Daniel Lezcano [EMAIL PROTECTED]
---
 include/net/fib_rules.h |4 ++--
 net/core/fib_rules.c|   20 +---
 net/decnet/dn_rules.c   |4 ++--
 net/ipv4/fib_rules.c|6 +++---
 net/ipv6/fib6_rules.c   |4 ++--
 5 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 6910e01..7f9f4ae 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -102,8 +102,8 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, 
struct nlattr **nla)
return frh-table;
 }
 
-extern int fib_rules_register(struct net *, struct fib_rules_ops *);
-extern void fib_rules_unregister(struct net *, struct fib_rules_ops *);
+extern int fib_rules_register(struct fib_rules_ops *);
+extern void fib_rules_unregister(struct fib_rules_ops *);
 extern void fib_rules_cleanup_ops(struct fib_rules_ops *);
 
 extern int fib_rules_lookup(struct fib_rules_ops *,
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 541728a..3cd4f13 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -37,8 +37,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 }
 EXPORT_SYMBOL(fib_default_rule_add);
 
-static void notify_rule_change(struct net *net, int event,
-  struct fib_rule *rule,
+static void notify_rule_change(int event, struct fib_rule *rule,
   struct fib_rules_ops *ops, struct nlmsghdr *nlh,
   u32 pid);
 
@@ -72,10 +71,13 @@ static void flush_route_cache(struct fib_rules_ops *ops)
ops-flush_cache();
 }
 
-int fib_rules_register(struct net *net, struct fib_rules_ops *ops)
+int fib_rules_register(struct fib_rules_ops *ops)
 {
int err = -EEXIST;
struct fib_rules_ops *o;
+   struct net *net;
+
+   net = ops-fro_net;
 
if (ops-rule_size  sizeof(struct fib_rule))
return -EINVAL;
@@ -112,8 +114,9 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 }
 EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
 
-void fib_rules_unregister(struct net *net, struct fib_rules_ops *ops)
+void fib_rules_unregister(struct fib_rules_ops *ops)
 {
+   struct net *net = ops-fro_net;
 
spin_lock(net-rules_mod_lock);
list_del_rcu(ops-list);
@@ -333,7 +336,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct 
nlmsghdr* nlh, void *arg)
else
list_add_rcu(rule-list, ops-rules_list);
 
-   notify_rule_change(net, RTM_NEWRULE, rule, ops, nlh, 
NETLINK_CB(skb).pid);
+   notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
flush_route_cache(ops);
rules_ops_put(ops);
return 0;
@@ -423,7 +426,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct 
nlmsghdr* nlh, void *arg)
}
 
synchronize_rcu();
-   notify_rule_change(net, RTM_DELRULE, rule, ops, nlh,
+   notify_rule_change(RTM_DELRULE, rule, ops, nlh,
   NETLINK_CB(skb).pid);
fib_rule_put(rule);
flush_route_cache(ops);
@@ -561,13 +564,15 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct 
netlink_callback *cb

  1   2   3   >