[PATCH net v3 1/1] net: tcp: Permit user set TCP_MAXSEG to default value

2017-03-20 Thread fgao
From: Gao Feng 

When user_mss is zero, it means use the default value. But the current
codes don't permit user set TCP_MAXSEG to the default value.
It would return the -EINVAL when val is zero.

Signed-off-by: Gao Feng 
---
 v3: Correct the logic error, per Neal
 v2: Make codes more clearer, per Eric
 v1: initial version

 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e319a5..4f7f163 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2470,7 +2470,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
 * at the point when this call is done we typically don't yet
 * know which interface is going to be used */
-   if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
+   if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
err = -EINVAL;
break;
}
-- 
1.9.1




[PATCH net v2 1/1] net: tcp: Permit user set TCP_MAXSEG to default value

2017-03-20 Thread fgao
From: Gao Feng 

When user_mss is zero, it means use the default value. But the current
codes don't permit user set TCP_MAXSEG to the default value.
It would return the -EINVAL when val is zero.

Signed-off-by: Gao Feng 
---
 v2: Make codes more clearer, per Eric
 v1: initial version

 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e319a5..4f7f163 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2470,7 +2470,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
 * at the point when this call is done we typically don't yet
 * know which interface is going to be used */
-   if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
+   if (!val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
err = -EINVAL;
break;
}
-- 
1.9.1




[PATCH nf v3 1/1] netfilter: snmp: Fix one possible panic when snmp_trap_helper fail to register

2017-03-20 Thread fgao
From: Gao Feng 

In the commit 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp
helper"), the snmp_helper is replaced by nf_nat_snmp_hook. So the
snmp_helper is never registered. But it still tries to unregister the
snmp_helper, it could cause the panic.

Now remove the useless snmp_helper and the unregister call in the
error handler.

Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper")

Signed-off-by: Gao Feng 
---
 v3: Remove the angle brackets in description, per Sergei
 v2: Add the SHA1 ID in the description, per Sergei
 v1: Initial version

 net/ipv4/netfilter/nf_nat_snmp_basic.c | 14 +-
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c 
b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c3..5787364 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@ static int help(struct sk_buff *skb, unsigned int 
protoff,
.timeout= 180,
 };
 
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
-   .me = THIS_MODULE,
-   .help   = help,
-   .expect_policy  = _exp_policy,
-   .name   = "snmp",
-   .tuple.src.l3num= AF_INET,
-   .tuple.src.u.udp.port   = cpu_to_be16(SNMP_PORT),
-   .tuple.dst.protonum = IPPROTO_UDP,
-};
-
 static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.me = THIS_MODULE,
.help   = help,
@@ -1294,10 +1284,8 @@ static int __init nf_nat_snmp_basic_init(void)
RCU_INIT_POINTER(nf_nat_snmp_hook, help);
 
ret = nf_conntrack_helper_register(_trap_helper);
-   if (ret < 0) {
-   nf_conntrack_helper_unregister(_helper);
+   if (ret < 0)
return ret;
-   }
return ret;
 }
 
-- 
1.9.1




[PATCH net 1/1] net: tcp: Permit user set TCP_MAXSEG to default value

2017-03-20 Thread fgao
From: Gao Feng 

When user_mss is zero, it means use the default value. But the current
codes don't permit user set TCP_MAXSEG to the default value.
It would return the -EINVAL when val is zero.

Signed-off-by: Gao Feng 
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e319a5..dd5e8e2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2470,7 +2470,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
 * at the point when this call is done we typically don't yet
 * know which interface is going to be used */
-   if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
+   if (val < 0 || (val > 0 && val < TCP_MIN_MSS) ||
+   val > MAX_TCP_WINDOW) {
err = -EINVAL;
break;
}
-- 
1.9.1




[PATCH nf v2 1/1] netfilter: snmp: Fix one possible panic when snmp_trap_helper fail to register

2017-03-20 Thread fgao
From: Gao Feng 

In the commit <93557f53e1fb> ("netfilter: nf_conntrack: nf_conntrack snmp
helper"), the snmp_helper is replaced by nf_nat_snmp_hook. So the
snmp_helper is never registered. But it still tries to unregister the
snmp_helper, it could cause the panic.

Now remove the useless snmp_helper and the unregister call in the
error handler.

Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper")

Signed-off-by: Gao Feng 
---
 v2: Add the SHA1 ID in the description, per Sergei
 v1: Initial version

 net/ipv4/netfilter/nf_nat_snmp_basic.c | 14 +-
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c 
b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c3..5787364 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@ static int help(struct sk_buff *skb, unsigned int 
protoff,
.timeout= 180,
 };
 
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
-   .me = THIS_MODULE,
-   .help   = help,
-   .expect_policy  = _exp_policy,
-   .name   = "snmp",
-   .tuple.src.l3num= AF_INET,
-   .tuple.src.u.udp.port   = cpu_to_be16(SNMP_PORT),
-   .tuple.dst.protonum = IPPROTO_UDP,
-};
-
 static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.me = THIS_MODULE,
.help   = help,
@@ -1294,10 +1284,8 @@ static int __init nf_nat_snmp_basic_init(void)
RCU_INIT_POINTER(nf_nat_snmp_hook, help);
 
ret = nf_conntrack_helper_register(_trap_helper);
-   if (ret < 0) {
-   nf_conntrack_helper_unregister(_helper);
+   if (ret < 0)
return ret;
-   }
return ret;
 }
 
-- 
1.9.1




[PATCH nf 1/1] netfilter: snmp: Fix one possible panic when snmp_trap_helper fail to register

2017-03-19 Thread fgao
From: Gao Feng 

In the commit ("netfilter: nf_conntrack: nf_conntrack snmp helper"),
the snmp_helper is replaced by nf_nat_snmp_hook. So the snmp_helper
is never registered. But it still tries to unregister the snmp_helper,
it could cause the panic.

Now remove the useless snmp_helper and the unregister call in the
error handler.

Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper")

Signed-off-by: Gao Feng 
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 14 +-
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c 
b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c3..5787364 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@ static int help(struct sk_buff *skb, unsigned int 
protoff,
.timeout= 180,
 };
 
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
-   .me = THIS_MODULE,
-   .help   = help,
-   .expect_policy  = _exp_policy,
-   .name   = "snmp",
-   .tuple.src.l3num= AF_INET,
-   .tuple.src.u.udp.port   = cpu_to_be16(SNMP_PORT),
-   .tuple.dst.protonum = IPPROTO_UDP,
-};
-
 static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.me = THIS_MODULE,
.help   = help,
@@ -1294,10 +1284,8 @@ static int __init nf_nat_snmp_basic_init(void)
RCU_INIT_POINTER(nf_nat_snmp_hook, help);
 
ret = nf_conntrack_helper_register(_trap_helper);
-   if (ret < 0) {
-   nf_conntrack_helper_unregister(_helper);
+   if (ret < 0)
return ret;
-   }
return ret;
 }
 
-- 
1.9.1




[PATCH v4 net-next 1/1] net: Eliminate duplicated codes by creating one new function in_dev_select_addr

2017-03-09 Thread fgao
From: Gao Feng 

There are two duplicated loops codes which used to select right
address in current codes. Now eliminate these codes by creating
one new function in_dev_select_addr.

Signed-off-by: Gao Feng 
---
 v4: Drop the first patch in series, per David Ahern
 v3: Add the cover letter, per David
 v2: Correct the comit log and remove useless braces, per Sergei
 v1: Initial Version

 net/ipv4/devinet.c | 32 ++--
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5d367b7..6bdcd21 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1191,6 +1191,18 @@ static int inet_gifconf(struct net_device *dev, char 
__user *buf, int len)
return done;
 }
 
+static __be32 in_dev_select_addr(const struct in_device *in_dev,
+int scope)
+{
+   for_primary_ifa(in_dev) {
+   if (ifa->ifa_scope != RT_SCOPE_LINK &&
+   ifa->ifa_scope <= scope)
+   return ifa->ifa_local;
+   } endfor_ifa(in_dev);
+
+   return 0;
+}
+
 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 {
__be32 addr = 0;
@@ -1227,13 +1239,9 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
if (master_idx &&
(dev = dev_get_by_index_rcu(net, master_idx)) &&
(in_dev = __in_dev_get_rcu(dev))) {
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   addr = in_dev_select_addr(in_dev, scope);
+   if (addr)
+   goto out_unlock;
}
 
/* Not loopback addresses on loopback should be preferred
@@ -1248,13 +1256,9 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
if (!in_dev)
continue;
 
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   addr = in_dev_select_addr(in_dev, scope);
+   if (addr)
+   goto out_unlock;
}
 out_unlock:
rcu_read_unlock();
-- 
1.9.1




[PATCH v3 net-next 2/2] net: Eliminate duplicated codes by creating one new function in_dev_select_addr

2017-03-09 Thread fgao
From: Gao Feng 

There are two duplicated loops codes which used to select right
address in current codes. Now eliminate these codes by creating
one new function in_dev_select_addr.

Signed-off-by: Gao Feng 
---
 v3: Add cover letter
 v2: Correct the comit log and remove useless braces, per Sergei
 v1: Initial Version 

 net/ipv4/devinet.c | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 1a9e550..f82d545 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1191,6 +1191,18 @@ static int inet_gifconf(struct net_device *dev, char 
__user *buf, int len)
return done;
 }
 
+static __be32 in_dev_select_addr(const struct in_device *in_dev,
+int scope)
+{
+   for_primary_ifa(in_dev) {
+   if (ifa->ifa_scope != RT_SCOPE_LINK &&
+   ifa->ifa_scope <= scope)
+   return ifa->ifa_local;
+   } endfor_ifa(in_dev);
+
+   return 0;
+}
+
 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 {
__be32 addr = 0;
@@ -1229,13 +1241,9 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
if (dev) {
in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   addr = in_dev_select_addr(in_dev, scope);
+   if (addr)
+   goto out_unlock;
}
}
 
@@ -1250,14 +1258,9 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
continue;
-
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   addr = in_dev_select_addr(in_dev, scope);
+   if (addr)
+   goto out_unlock;
}
}
 out_unlock:
-- 
1.9.1




[PATCH v3 net-next 0/2] net: Refine the inet_select_addr codes

2017-03-09 Thread fgao
From: Gao Feng 

This collection contains two patches used to refine the inet_select_addr codes.


Gao Feng (2):
  net: Avoid unnessary loop when master_idx is invalid in
inet_select_addr
  net: Eliminate duplicated codes by creating one new function
in_dev_select_addr

 net/ipv4/devinet.c | 71 ++
 1 file changed, 39 insertions(+), 32 deletions(-)

-- 
 v3: Add the cover letter, per David
 v2: Correct the comit log and remove useless braces, per Sergei
 v1: Initial Version

1.9.1




[PATCH v3 net-next 1/2] net: Avoid unnessary loop when master_idx is invalid in inet_select_addr

2017-03-09 Thread fgao
From: Gao Feng 

When master_idx is invalid, it is zero. It is unnecessary to iterate
all netdevs. Because l3mdev_master_ifindex_rcu(dev) != master_idx must
be true.
Now put this loop into the condition block when master_idx is valid.

Signed-off-by: Gao Feng 
---
 v3: Add the cover letter, per David
 v2: Correct the comit log and remove useless braces, per Sergei
 v1: Initial Version

 net/ipv4/devinet.c | 68 +-
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5d367b7..1a9e550 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1219,42 +1219,46 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
 no_in_dev:
master_idx = l3mdev_master_ifindex_rcu(dev);
 
-   /* For VRFs, the VRF device takes the place of the loopback device,
-* with addresses on it being preferred.  Note in such cases the
-* loopback device will be among the devices that fail the master_idx
-* equality check in the loop below.
-*/
-   if (master_idx &&
-   (dev = dev_get_by_index_rcu(net, master_idx)) &&
-   (in_dev = __in_dev_get_rcu(dev))) {
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
+   if (master_idx) {
+   /* For VRFs, the VRF device takes the place of the loopback 
device,
+* with addresses on it being preferred.  Note in such cases the
+* loopback device will be among the devices that fail the 
master_idx
+* equality check in the loop below.
+*/
+   dev = dev_get_by_index_rcu(net, master_idx);
+   if (dev) {
+   in_dev = __in_dev_get_rcu(dev);
+   if (in_dev) {
+   for_primary_ifa(in_dev) {
+   if (ifa->ifa_scope != RT_SCOPE_LINK &&
+   ifa->ifa_scope <= scope) {
+   addr = ifa->ifa_local;
+   goto out_unlock;
+   }
+   } endfor_ifa(in_dev);
}
-   } endfor_ifa(in_dev);
-   }
+   }
 
-   /* Not loopback addresses on loopback should be preferred
-  in this case. It is important that lo is the first interface
-  in dev_base list.
-*/
-   for_each_netdev_rcu(net, dev) {
-   if (l3mdev_master_ifindex_rcu(dev) != master_idx)
-   continue;
+   /* Not loopback addresses on loopback should be preferred
+  in this case. It is important that lo is the first interface
+  in dev_base list.
+*/
+   for_each_netdev_rcu(net, dev) {
+   if (l3mdev_master_ifindex_rcu(dev) != master_idx)
+   continue;
 
-   in_dev = __in_dev_get_rcu(dev);
-   if (!in_dev)
-   continue;
+   in_dev = __in_dev_get_rcu(dev);
+   if (!in_dev)
+   continue;
 
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   for_primary_ifa(in_dev) {
+   if (ifa->ifa_scope != RT_SCOPE_LINK &&
+   ifa->ifa_scope <= scope) {
+   addr = ifa->ifa_local;
+   goto out_unlock;
+   }
+   } endfor_ifa(in_dev);
+   }
}
 out_unlock:
rcu_read_unlock();
-- 
1.9.1




[PATCH v2 net-next 2/2] net: Eliminate duplicated codes by creating one new function in_dev_select_addr

2017-03-09 Thread fgao
From: Gao Feng 

There are two duplicated loops codes which used to select right
address in current codes. Now eliminate these codes by creating
one new function in_dev_select_addr.

Signed-off-by: Gao Feng 
---
 v2: Correct the comit log and remove useless braces, per Sergei
 v1: Initial Version

 net/ipv4/devinet.c | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 1a9e550..f82d545 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1191,6 +1191,18 @@ static int inet_gifconf(struct net_device *dev, char 
__user *buf, int len)
return done;
 }
 
+static __be32 in_dev_select_addr(const struct in_device *in_dev,
+int scope)
+{
+   for_primary_ifa(in_dev) {
+   if (ifa->ifa_scope != RT_SCOPE_LINK &&
+   ifa->ifa_scope <= scope)
+   return ifa->ifa_local;
+   } endfor_ifa(in_dev);
+
+   return 0;
+}
+
 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 {
__be32 addr = 0;
@@ -1229,13 +1241,9 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
if (dev) {
in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   addr = in_dev_select_addr(in_dev, scope);
+   if (addr)
+   goto out_unlock;
}
}
 
@@ -1250,14 +1258,9 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
continue;
-
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   addr = in_dev_select_addr(in_dev, scope);
+   if (addr)
+   goto out_unlock;
}
}
 out_unlock:
-- 
1.9.1




[PATCH net-next 1/2] net: Avoid unnessary loop when master_idx is invalid in inet_select_addr

2017-03-07 Thread fgao
From: Gao Feng 

When master_idx is invalid, it is zero. It is unnecessary to iterate
all netdevs. Because l3mdev_master_ifindex_rcu(dev) != master_idx must
be true.
Now put this loop into the condition block when master_idx is valid.

Signed-off-by: Gao Feng 
---
 net/ipv4/devinet.c | 68 +-
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5d367b7..1a9e550 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1219,42 +1219,46 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
 no_in_dev:
master_idx = l3mdev_master_ifindex_rcu(dev);
 
-   /* For VRFs, the VRF device takes the place of the loopback device,
-* with addresses on it being preferred.  Note in such cases the
-* loopback device will be among the devices that fail the master_idx
-* equality check in the loop below.
-*/
-   if (master_idx &&
-   (dev = dev_get_by_index_rcu(net, master_idx)) &&
-   (in_dev = __in_dev_get_rcu(dev))) {
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
+   if (master_idx) {
+   /* For VRFs, the VRF device takes the place of the loopback 
device,
+* with addresses on it being preferred.  Note in such cases the
+* loopback device will be among the devices that fail the 
master_idx
+* equality check in the loop below.
+*/
+   dev = dev_get_by_index_rcu(net, master_idx);
+   if (dev) {
+   in_dev = __in_dev_get_rcu(dev);
+   if (in_dev) {
+   for_primary_ifa(in_dev) {
+   if (ifa->ifa_scope != RT_SCOPE_LINK &&
+   ifa->ifa_scope <= scope) {
+   addr = ifa->ifa_local;
+   goto out_unlock;
+   }
+   } endfor_ifa(in_dev);
}
-   } endfor_ifa(in_dev);
-   }
+   }
 
-   /* Not loopback addresses on loopback should be preferred
-  in this case. It is important that lo is the first interface
-  in dev_base list.
-*/
-   for_each_netdev_rcu(net, dev) {
-   if (l3mdev_master_ifindex_rcu(dev) != master_idx)
-   continue;
+   /* Not loopback addresses on loopback should be preferred
+  in this case. It is important that lo is the first interface
+  in dev_base list.
+*/
+   for_each_netdev_rcu(net, dev) {
+   if (l3mdev_master_ifindex_rcu(dev) != master_idx)
+   continue;
 
-   in_dev = __in_dev_get_rcu(dev);
-   if (!in_dev)
-   continue;
+   in_dev = __in_dev_get_rcu(dev);
+   if (!in_dev)
+   continue;
 
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   for_primary_ifa(in_dev) {
+   if (ifa->ifa_scope != RT_SCOPE_LINK &&
+   ifa->ifa_scope <= scope) {
+   addr = ifa->ifa_local;
+   goto out_unlock;
+   }
+   } endfor_ifa(in_dev);
+   }
}
 out_unlock:
rcu_read_unlock();
-- 
1.9.1




[PATCH net-next 2/2] net: Eliminate duplicated codes by creating one new function in_dev_select_addr

2017-03-07 Thread fgao
From: Gao Feng 

There are two duplicated loop codes which used to select right
address in current codes. Now eliminate these codes by creating
one new function in_dev_select_addr.

Signed-off-by: Gao Feng 
---
 net/ipv4/devinet.c | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 1a9e550..d0964c5 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1191,6 +1191,19 @@ static int inet_gifconf(struct net_device *dev, char 
__user *buf, int len)
return done;
 }
 
+static __be32 in_dev_select_addr(const struct in_device *in_dev,
+int scope)
+{
+   for_primary_ifa(in_dev) {
+   if (ifa->ifa_scope != RT_SCOPE_LINK &&
+   ifa->ifa_scope <= scope) {
+   return ifa->ifa_local;
+   }
+   } endfor_ifa(in_dev);
+
+   return 0;
+}
+
 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 {
__be32 addr = 0;
@@ -1229,13 +1242,9 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
if (dev) {
in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   addr = in_dev_select_addr(in_dev, scope);
+   if (addr)
+   goto out_unlock;
}
}
 
@@ -1250,14 +1259,9 @@ __be32 inet_select_addr(const struct net_device *dev, 
__be32 dst, int scope)
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
continue;
-
-   for_primary_ifa(in_dev) {
-   if (ifa->ifa_scope != RT_SCOPE_LINK &&
-   ifa->ifa_scope <= scope) {
-   addr = ifa->ifa_local;
-   goto out_unlock;
-   }
-   } endfor_ifa(in_dev);
+   addr = in_dev_select_addr(in_dev, scope);
+   if (addr)
+   goto out_unlock;
}
}
 out_unlock:
-- 
1.9.1




[PATCH net-next 1/1] decnet: Use TCP nagle macro instead of literal number in decnet

2017-03-04 Thread fgao
From: Gao Feng 

Use existing TCP nagle macro TCP_NAGLE_OFF and TCP_NAGLE_CORK instead
of the literal number 1 and 2 in the current decnet codes.

Signed-off-by: Gao Feng 
---
 net/decnet/af_decnet.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a90ed67..15f1b65 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -132,6 +132,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1468,18 +1469,18 @@ static int __dn_setsockopt(struct socket *sock, int 
level,int optname, char __us
case DSO_NODELAY:
if (optlen != sizeof(int))
return -EINVAL;
-   if (scp->nonagle == 2)
+   if (scp->nonagle == TCP_NAGLE_CORK)
return -EINVAL;
-   scp->nonagle = (u.val == 0) ? 0 : 1;
+   scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF;
/* if (scp->nonagle == 1) { Push pending frames } */
break;
 
case DSO_CORK:
if (optlen != sizeof(int))
return -EINVAL;
-   if (scp->nonagle == 1)
+   if (scp->nonagle == TCP_NAGLE_OFF)
return -EINVAL;
-   scp->nonagle = (u.val == 0) ? 0 : 2;
+   scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK;
/* if (scp->nonagle == 0) { Push pending frames } */
break;
 
@@ -1607,14 +1608,14 @@ static int __dn_getsockopt(struct socket *sock, int 
level,int optname, char __us
case DSO_NODELAY:
if (r_len > sizeof(int))
r_len = sizeof(int);
-   val = (scp->nonagle == 1);
+   val = (scp->nonagle == TCP_NAGLE_OFF);
r_data = 
break;
 
case DSO_CORK:
if (r_len > sizeof(int))
r_len = sizeof(int);
-   val = (scp->nonagle == 2);
+   val = (scp->nonagle == TCP_NAGLE_CORK);
r_data = 
break;
 
-- 
1.9.1




[PATCH nf 1/1] netfilter: h323,sip: Fix possible dead loop in nat_rtp_rtcp and nf_nat_sdp_media

2017-03-02 Thread fgao
From: Gao Feng 

When h323 and sip try to insert expect nodes, they would increase
the port by 2 for loop, and the loop condition is that "port != 0".
So when the start port is odd number, port never increases to zero.

Now make port as u32 instead of u_int16_t, and the loop condition is
"port <= USHRT_MAX".

Signed-off-by: Gao Feng 
---
 net/ipv4/netfilter/nf_nat_h323.c | 4 ++--
 net/netfilter/nf_nat_sip.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 574f7eb..010fc3e 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -183,7 +183,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn 
*ct,
struct nf_ct_h323_master *info = nfct_help_data(ct);
int dir = CTINFO2DIR(ctinfo);
int i;
-   u_int16_t nated_port;
+   u32 nated_port;
 
/* Set expectations for NAT */
rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
@@ -218,7 +218,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn 
*ct,
 
/* Try to get a pair of ports. */
for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
-nated_port != 0; nated_port += 2) {
+nated_port <= USHRT_MAX; nated_port += 2) {
int ret;
 
rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 791fac4..0b24eb3 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -548,7 +548,7 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, 
unsigned int protoff,
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, );
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-   u_int16_t port;
+   u32 port;
 
/* Connection will come from reply */
if (nf_inet_addr_cmp(>tuplehash[dir].tuple.src.u3,
@@ -571,7 +571,7 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, 
unsigned int protoff,
 
/* Try to get same pair of ports: if not, try to change them. */
for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
-port != 0; port += 2) {
+port <= USHRT_MAX; port += 2) {
int ret;
 
rtp_exp->tuple.dst.u.udp.port = htons(port);
-- 
1.9.1




[PATCH net-next v2 1/1] net: sock: Use USEC_PER_SEC macro instead of literal 1000000

2017-02-21 Thread fgao
From: Gao Feng 

The USEC_PER_SEC is used once in sock_set_timeout as the max value of
tv_usec. But there are other similar codes which use the literal
100 in this file.
It is minor cleanup to keep consitent.

Signed-off-by: Gao Feng 
---
 v2: Use DIV_ROUND_UP, per Joe Perches
 v1: Initial version

 net/core/sock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 8b35debf..e2f7abd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -367,7 +367,7 @@ static int sock_set_timeout(long *timeo_p, char __user 
*optval, int optlen)
if (tv.tv_sec == 0 && tv.tv_usec == 0)
return 0;
if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
-   *timeo_p = tv.tv_sec*HZ + 
(tv.tv_usec+(100/HZ-1))/(100/HZ);
+   *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, 
USEC_PER_SEC / HZ);
return 0;
 }
 
@@ -1145,7 +1145,7 @@ int sock_getsockopt(struct socket *sock, int level, int 
optname,
v.tm.tv_usec = 0;
} else {
v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
-   v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 100) / HZ;
+   v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) 
/ HZ;
}
break;
 
@@ -1156,7 +1156,7 @@ int sock_getsockopt(struct socket *sock, int level, int 
optname,
v.tm.tv_usec = 0;
} else {
v.tm.tv_sec = sk->sk_sndtimeo / HZ;
-   v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 100) / HZ;
+   v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) 
/ HZ;
}
break;
 
-- 
1.9.1




[PATCH net-next 1/1] net: sock: Use USEC_PER_SEC macro instead of literal 1000000

2017-02-20 Thread fgao
From: Gao Feng 

The USEC_PER_SEC is used once in sock_set_timeout as the max value of
tv_usec. But there are other similar codes which use the literal
100 in this file.
It is minor cleanup to keep consitent.

Signed-off-by: Gao Feng 
---
 net/core/sock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 8b35debf..fa47dd7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -367,7 +367,7 @@ static int sock_set_timeout(long *timeo_p, char __user 
*optval, int optlen)
if (tv.tv_sec == 0 && tv.tv_usec == 0)
return 0;
if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
-   *timeo_p = tv.tv_sec*HZ + 
(tv.tv_usec+(100/HZ-1))/(100/HZ);
+   *timeo_p = tv.tv_sec * HZ + (tv.tv_usec + (USEC_PER_SEC / HZ - 
1)) / (USEC_PER_SEC / HZ);
return 0;
 }
 
@@ -1145,7 +1145,7 @@ int sock_getsockopt(struct socket *sock, int level, int 
optname,
v.tm.tv_usec = 0;
} else {
v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
-   v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 100) / HZ;
+   v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) 
/ HZ;
}
break;
 
@@ -1156,7 +1156,7 @@ int sock_getsockopt(struct socket *sock, int level, int 
optname,
v.tm.tv_usec = 0;
} else {
v.tm.tv_sec = sk->sk_sndtimeo / HZ;
-   v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 100) / HZ;
+   v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) 
/ HZ;
}
break;
 
-- 
1.9.1




[PATCH net-next 1/1] pkt_sched: Remove useless qdisc_stab_lock

2017-02-16 Thread fgao
From: Gao Feng 

The qdisc_stab_lock is used in qdisc_get_stab and qdisc_put_stab.
These two functions are invoked in qdisc_create, qdisc_change, and
qdisc_destroy which run fully under RTNL.

So it already makes sure only one could access the qdisc_stab_list at
the same time. Then it is unnecessary to use qdisc_stab_lock now.

Signed-off-by: Gao Feng 
---
 net/sched/sch_api.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a13c15e..bcf49cd 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -440,7 +440,6 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
 EXPORT_SYMBOL(qdisc_put_rtab);
 
 static LIST_HEAD(qdisc_stab_list);
-static DEFINE_SPINLOCK(qdisc_stab_lock);
 
 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
[TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
@@ -474,20 +473,15 @@ static struct qdisc_size_table *qdisc_get_stab(struct 
nlattr *opt)
if (tsize != s->tsize || (!tab && tsize > 0))
return ERR_PTR(-EINVAL);
 
-   spin_lock(_stab_lock);
-
list_for_each_entry(stab, _stab_list, list) {
if (memcmp(>szopts, s, sizeof(*s)))
continue;
if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
continue;
stab->refcnt++;
-   spin_unlock(_stab_lock);
return stab;
}
 
-   spin_unlock(_stab_lock);
-
stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
if (!stab)
return ERR_PTR(-ENOMEM);
@@ -497,9 +491,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct 
nlattr *opt)
if (tsize > 0)
memcpy(stab->data, tab, tsize * sizeof(u16));
 
-   spin_lock(_stab_lock);
list_add_tail(>list, _stab_list);
-   spin_unlock(_stab_lock);
 
return stab;
 }
@@ -514,14 +506,10 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
if (!tab)
return;
 
-   spin_lock(_stab_lock);
-
if (--tab->refcnt == 0) {
list_del(>list);
call_rcu_bh(>rcu, stab_kfree_rcu);
}
-
-   spin_unlock(_stab_lock);
 }
 EXPORT_SYMBOL(qdisc_put_stab);
 
-- 
1.9.1




[PATCH net 1/1] net: sock: Use double send/recv buff value to compare with max value

2017-02-08 Thread fgao
From: Gao Feng 

Because the value of SO_SNDBUF and SO_RCVBUF is doubled before
assignment, so the real value of send and recv buffer could be more
than the max sysctl config sysctl_wmem_max and sysctl_rmem_max.

Now use doulbe send/recv buffer value to compare with sysctl_wmem_max
and sysctl_rmem_max, and it keeps consistence with SOCK_MIN_SNDBUF
and SOCK_MIN_RCVBUF.

Signed-off-by: Gao Feng 
---
 net/core/sock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 4eca27d..fa40dff 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -712,7 +712,7 @@ int sock_setsockopt(struct socket *sock, int level, int 
optname,
 * play 'guess the biggest size' games. RCVBUF/SNDBUF
 * are treated in BSD as hints
 */
-   val = min_t(u32, val, sysctl_wmem_max);
+   val = min_t(u32, val * 2, sysctl_wmem_max);
 set_sndbuf:
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
@@ -733,7 +733,7 @@ int sock_setsockopt(struct socket *sock, int level, int 
optname,
 * play 'guess the biggest size' games. RCVBUF/SNDBUF
 * are treated in BSD as hints
 */
-   val = min_t(u32, val, sysctl_rmem_max);
+   val = min_t(u32, val * 2, sysctl_rmem_max);
 set_rcvbuf:
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
/*
-- 
1.9.1




[PATCH net-next 1/1] net: ping: Use right format specifier to avoid type casting

2017-01-16 Thread fgao
From: Gao Feng 

The inet_num is u16, so use %hu instead of casting it to int. And
the sk_bound_dev_if is int actually, so it needn't cast to int.

Signed-off-by: Gao Feng 
---
 net/ipv4/ping.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 86cca61..592db6a 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -433,9 +433,9 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int 
addr_len)
goto out;
}
 
-   pr_debug("after bind(): num = %d, dif = %d\n",
-(int)isk->inet_num,
-(int)sk->sk_bound_dev_if);
+   pr_debug("after bind(): num = %hu, dif = %d\n",
+isk->inet_num,
+sk->sk_bound_dev_if);
 
err = 0;
if (sk->sk_family == AF_INET && isk->inet_rcv_saddr)
-- 
1.9.1




[PATCH net-next RESEND 1/1] driver: ipvlan: Define common functions to decrease duplicated codes used to add or del IP address

2016-12-28 Thread fgao
From: Gao Feng 

There are some duplicated codes in ipvlan_add_addr6/4 and
ipvlan_del_addr6/4. Now define two common functions ipvlan_add_addr
and ipvlan_del_addr to decrease the duplicated codes.
It could be helful to maintain the codes.

Signed-off-by: Gao Feng 
---
 It is sent again because the first email is sent during net-next closing.

 drivers/net/ipvlan/ipvlan_main.c | 68 +---
 1 file changed, 29 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 693ec5b..5874d30 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -669,23 +669,22 @@ static int ipvlan_device_event(struct notifier_block 
*unused,
return NOTIFY_DONE;
 }
 
-static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
struct ipvl_addr *addr;
 
-   if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
-   netif_err(ipvlan, ifup, ipvlan->dev,
- "Failed to add IPv6=%pI6c addr for %s intf\n",
- ip6_addr, ipvlan->dev->name);
-   return -EINVAL;
-   }
addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
if (!addr)
return -ENOMEM;
 
addr->master = ipvlan;
-   memcpy(>ip6addr, ip6_addr, sizeof(struct in6_addr));
-   addr->atype = IPVL_IPV6;
+   if (is_v6) {
+   memcpy(>ip6addr, iaddr, sizeof(struct in6_addr));
+   addr->atype = IPVL_IPV6;
+   } else {
+   memcpy(>ip4addr, iaddr, sizeof(struct in_addr));
+   addr->atype = IPVL_IPV4;
+   }
list_add_tail(>anode, >addrs);
 
/* If the interface is not up, the address will be added to the hash
@@ -697,11 +696,11 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, 
struct in6_addr *ip6_addr)
return 0;
 }
 
-static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr 
*ip6_addr)
+static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
struct ipvl_addr *addr;
 
-   addr = ipvlan_find_addr(ipvlan, ip6_addr, true);
+   addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
if (!addr)
return;
 
@@ -712,6 +711,23 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, 
struct in6_addr *ip6_addr)
return;
 }
 
+static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+   if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
+   netif_err(ipvlan, ifup, ipvlan->dev,
+ "Failed to add IPv6=%pI6c addr for %s intf\n",
+ ip6_addr, ipvlan->dev->name);
+   return -EINVAL;
+   }
+
+   return ipvlan_add_addr(ipvlan, ip6_addr, true);
+}
+
+static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr 
*ip6_addr)
+{
+   return ipvlan_del_addr(ipvlan, ip6_addr, true);
+}
+
 static int ipvlan_addr6_event(struct notifier_block *unused,
  unsigned long event, void *ptr)
 {
@@ -745,45 +761,19 @@ static int ipvlan_addr6_event(struct notifier_block 
*unused,
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-   struct ipvl_addr *addr;
-
if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
netif_err(ipvlan, ifup, ipvlan->dev,
  "Failed to add IPv4=%pI4 on %s intf.\n",
  ip4_addr, ipvlan->dev->name);
return -EINVAL;
}
-   addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL);
-   if (!addr)
-   return -ENOMEM;
-
-   addr->master = ipvlan;
-   memcpy(>ip4addr, ip4_addr, sizeof(struct in_addr));
-   addr->atype = IPVL_IPV4;
-   list_add_tail(>anode, >addrs);
-
-   /* If the interface is not up, the address will be added to the hash
-* list by ipvlan_open.
-*/
-   if (netif_running(ipvlan->dev))
-   ipvlan_ht_addr_add(ipvlan, addr);
 
-   return 0;
+   return ipvlan_add_addr(ipvlan, ip4_addr, false);
 }
 
 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-   struct ipvl_addr *addr;
-
-   addr = ipvlan_find_addr(ipvlan, ip4_addr, false);
-   if (!addr)
-   return;
-
-   ipvlan_ht_addr_del(addr);
-   list_del(>anode);
-   kfree_rcu(addr, rcu);
-
-   return;
+   return ipvlan_del_addr(ipvlan, ip4_addr, false);
 }
 
 static int ipvlan_addr4_event(struct notifier_block *unused,
-- 
1.9.1




[PATCH net-next RESEND 1/1] driver: ipvlan: Remove unnecessary ipvlan NULL check in ipvlan_count_rx

2016-12-28 Thread fgao
From: Gao Feng 

There are three functions which would invoke the ipvlan_count_rx. They
are ipvlan_process_multicast, ipvlan_rcv_frame, and ipvlan_nf_input.
The former two functions already use the ipvlan directly before
ipvlan_count_rx, and ipvlan_nf_input gets the ipvlan from
ipvl_addr->master, it is not possible to be NULL too.
So the ipvlan pointer check is unnecessary in ipvlan_count_rx.

Signed-off-by: Gao Feng 
---
 It is sent again because the last email is sent during net-next closed

 drivers/net/ipvlan/ipvlan_core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index b4e9907..082f9f1 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -19,9 +19,6 @@ void ipvlan_init_secret(void)
 static void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
unsigned int len, bool success, bool mcast)
 {
-   if (!ipvlan)
-   return;
-
if (likely(success)) {
struct ipvl_pcpu_stats *pcptr;
 
-- 
1.9.1




[PATCH net-next 1/1] driver: ipvlan: Define common functions to decrease duplicated codes used to add or del IP address

2016-12-18 Thread fgao
From: Gao Feng 

There are some duplicated codes in ipvlan_add_addr6/4 and
ipvlan_del_addr6/4. Now define two common functions ipvlan_add_addr
and ipvlan_del_addr to decrease the duplicated codes.
It could be helful to maintain the codes.

Signed-off-by: Gao Feng 
---
 It is sent again because the first email is sent during net-next closing.

 drivers/net/ipvlan/ipvlan_main.c | 68 +---
 1 file changed, 29 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 693ec5b..5874d30 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -669,23 +669,22 @@ static int ipvlan_device_event(struct notifier_block 
*unused,
return NOTIFY_DONE;
 }
 
-static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
struct ipvl_addr *addr;
 
-   if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
-   netif_err(ipvlan, ifup, ipvlan->dev,
- "Failed to add IPv6=%pI6c addr for %s intf\n",
- ip6_addr, ipvlan->dev->name);
-   return -EINVAL;
-   }
addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
if (!addr)
return -ENOMEM;
 
addr->master = ipvlan;
-   memcpy(>ip6addr, ip6_addr, sizeof(struct in6_addr));
-   addr->atype = IPVL_IPV6;
+   if (is_v6) {
+   memcpy(>ip6addr, iaddr, sizeof(struct in6_addr));
+   addr->atype = IPVL_IPV6;
+   } else {
+   memcpy(>ip4addr, iaddr, sizeof(struct in_addr));
+   addr->atype = IPVL_IPV4;
+   }
list_add_tail(>anode, >addrs);
 
/* If the interface is not up, the address will be added to the hash
@@ -697,11 +696,11 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, 
struct in6_addr *ip6_addr)
return 0;
 }
 
-static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr 
*ip6_addr)
+static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
struct ipvl_addr *addr;
 
-   addr = ipvlan_find_addr(ipvlan, ip6_addr, true);
+   addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
if (!addr)
return;
 
@@ -712,6 +711,23 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, 
struct in6_addr *ip6_addr)
return;
 }
 
+static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+   if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
+   netif_err(ipvlan, ifup, ipvlan->dev,
+ "Failed to add IPv6=%pI6c addr for %s intf\n",
+ ip6_addr, ipvlan->dev->name);
+   return -EINVAL;
+   }
+
+   return ipvlan_add_addr(ipvlan, ip6_addr, true);
+}
+
+static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr 
*ip6_addr)
+{
+   return ipvlan_del_addr(ipvlan, ip6_addr, true);
+}
+
 static int ipvlan_addr6_event(struct notifier_block *unused,
  unsigned long event, void *ptr)
 {
@@ -745,45 +761,19 @@ static int ipvlan_addr6_event(struct notifier_block 
*unused,
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-   struct ipvl_addr *addr;
-
if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
netif_err(ipvlan, ifup, ipvlan->dev,
  "Failed to add IPv4=%pI4 on %s intf.\n",
  ip4_addr, ipvlan->dev->name);
return -EINVAL;
}
-   addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL);
-   if (!addr)
-   return -ENOMEM;
-
-   addr->master = ipvlan;
-   memcpy(>ip4addr, ip4_addr, sizeof(struct in_addr));
-   addr->atype = IPVL_IPV4;
-   list_add_tail(>anode, >addrs);
-
-   /* If the interface is not up, the address will be added to the hash
-* list by ipvlan_open.
-*/
-   if (netif_running(ipvlan->dev))
-   ipvlan_ht_addr_add(ipvlan, addr);
 
-   return 0;
+   return ipvlan_add_addr(ipvlan, ip4_addr, false);
 }
 
 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-   struct ipvl_addr *addr;
-
-   addr = ipvlan_find_addr(ipvlan, ip4_addr, false);
-   if (!addr)
-   return;
-
-   ipvlan_ht_addr_del(addr);
-   list_del(>anode);
-   kfree_rcu(addr, rcu);
-
-   return;
+   return ipvlan_del_addr(ipvlan, ip4_addr, false);
 }
 
 static int ipvlan_addr4_event(struct notifier_block *unused,
-- 
1.9.1




[PATCH net-next 1/1] driver: ipvlan: Remove unnecessary ipvlan NULL check in ipvlan_count_rx

2016-12-18 Thread fgao
From: Gao Feng 

There are three functions which would invoke the ipvlan_count_rx. They
are ipvlan_process_multicast, ipvlan_rcv_frame, and ipvlan_nf_input.
The former two functions already use the ipvlan directly before
ipvlan_count_rx, and ipvlan_nf_input gets the ipvlan from
ipvl_addr->master, it is not possible to be NULL too.
So the ipvlan pointer check is unnecessary in ipvlan_count_rx.

Signed-off-by: Gao Feng 
---
 drivers/net/ipvlan/ipvlan_core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index b4e9907..082f9f1 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -19,9 +19,6 @@ void ipvlan_init_secret(void)
 static void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
unsigned int len, bool success, bool mcast)
 {
-   if (!ipvlan)
-   return;
-
if (likely(success)) {
struct ipvl_pcpu_stats *pcptr;
 
-- 
1.9.1




[PATCH net-next 1/1] driver: ipvlan: Define common functions to decrease duplicated codes used to add or del IP address

2016-12-14 Thread fgao
From: Gao Feng 

There are some duplicated codes in ipvlan_add_addr6/4 and
ipvlan_del_addr6/4. Now define two common functions ipvlan_add_addr
and ipvlan_del_addr to decrease the duplicated codes.
It could be helful to maintain the codes.

Signed-off-by: Gao Feng 
---
 drivers/net/ipvlan/ipvlan_main.c | 68 +---
 1 file changed, 29 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 693ec5b..5874d30 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -669,23 +669,22 @@ static int ipvlan_device_event(struct notifier_block 
*unused,
return NOTIFY_DONE;
 }
 
-static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
struct ipvl_addr *addr;
 
-   if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
-   netif_err(ipvlan, ifup, ipvlan->dev,
- "Failed to add IPv6=%pI6c addr for %s intf\n",
- ip6_addr, ipvlan->dev->name);
-   return -EINVAL;
-   }
addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
if (!addr)
return -ENOMEM;
 
addr->master = ipvlan;
-   memcpy(>ip6addr, ip6_addr, sizeof(struct in6_addr));
-   addr->atype = IPVL_IPV6;
+   if (is_v6) {
+   memcpy(>ip6addr, iaddr, sizeof(struct in6_addr));
+   addr->atype = IPVL_IPV6;
+   } else {
+   memcpy(>ip4addr, iaddr, sizeof(struct in_addr));
+   addr->atype = IPVL_IPV4;
+   }
list_add_tail(>anode, >addrs);
 
/* If the interface is not up, the address will be added to the hash
@@ -697,11 +696,11 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, 
struct in6_addr *ip6_addr)
return 0;
 }
 
-static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr 
*ip6_addr)
+static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
struct ipvl_addr *addr;
 
-   addr = ipvlan_find_addr(ipvlan, ip6_addr, true);
+   addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
if (!addr)
return;
 
@@ -712,6 +711,23 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, 
struct in6_addr *ip6_addr)
return;
 }
 
+static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+   if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
+   netif_err(ipvlan, ifup, ipvlan->dev,
+ "Failed to add IPv6=%pI6c addr for %s intf\n",
+ ip6_addr, ipvlan->dev->name);
+   return -EINVAL;
+   }
+
+   return ipvlan_add_addr(ipvlan, ip6_addr, true);
+}
+
+static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr 
*ip6_addr)
+{
+   return ipvlan_del_addr(ipvlan, ip6_addr, true);
+}
+
 static int ipvlan_addr6_event(struct notifier_block *unused,
  unsigned long event, void *ptr)
 {
@@ -745,45 +761,19 @@ static int ipvlan_addr6_event(struct notifier_block 
*unused,
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-   struct ipvl_addr *addr;
-
if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
netif_err(ipvlan, ifup, ipvlan->dev,
  "Failed to add IPv4=%pI4 on %s intf.\n",
  ip4_addr, ipvlan->dev->name);
return -EINVAL;
}
-   addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL);
-   if (!addr)
-   return -ENOMEM;
-
-   addr->master = ipvlan;
-   memcpy(>ip4addr, ip4_addr, sizeof(struct in_addr));
-   addr->atype = IPVL_IPV4;
-   list_add_tail(>anode, >addrs);
-
-   /* If the interface is not up, the address will be added to the hash
-* list by ipvlan_open.
-*/
-   if (netif_running(ipvlan->dev))
-   ipvlan_ht_addr_add(ipvlan, addr);
 
-   return 0;
+   return ipvlan_add_addr(ipvlan, ip4_addr, false);
 }
 
 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-   struct ipvl_addr *addr;
-
-   addr = ipvlan_find_addr(ipvlan, ip4_addr, false);
-   if (!addr)
-   return;
-
-   ipvlan_ht_addr_del(addr);
-   list_del(>anode);
-   kfree_rcu(addr, rcu);
-
-   return;
+   return ipvlan_del_addr(ipvlan, ip4_addr, false);
 }
 
 static int ipvlan_addr4_event(struct notifier_block *unused,
-- 
1.9.1




[PATCH net v2 1/1] driver: ipvlan: Unlink the upper dev when ipvlan_link_new failed

2016-12-07 Thread fgao
From: Gao Feng 

When netdev_upper_dev_unlink failed in ipvlan_link_new, need to
unlink the ipvlan dev with upper dev.

Signed-off-by: Gao Feng 
---
 v2: Rename the label to unlink_netdev, per Mahesh Bandewar
 v1: Initial patch

 drivers/net/ipvlan/ipvlan_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 0fef178..dfbc4ef 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -546,13 +546,15 @@ static int ipvlan_link_new(struct net *src_net, struct 
net_device *dev,
}
err = ipvlan_set_port_mode(port, mode);
if (err) {
-   goto unregister_netdev;
+   goto unlink_netdev;
}
 
list_add_tail_rcu(>pnode, >ipvlans);
netif_stacked_transfer_operstate(phy_dev, dev);
return 0;
 
+unlink_netdev:
+   netdev_upper_dev_unlink(phy_dev, dev);
 unregister_netdev:
unregister_netdevice(dev);
 destroy_ipvlan_port:
-- 
1.9.1




[PATCH net 1/1] driver: ipvlan: Unlink the upper dev when ipvlan_link_new failed

2016-12-07 Thread fgao
From: Gao Feng 

When netdev_upper_dev_unlink failed in ipvlan_link_new, need to
unlink the ipvlan dev with upper dev.

Signed-off-by: Gao Feng 
---
 drivers/net/ipvlan/ipvlan_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 0fef178..189adbc 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -546,13 +546,15 @@ static int ipvlan_link_new(struct net *src_net, struct 
net_device *dev,
}
err = ipvlan_set_port_mode(port, mode);
if (err) {
-   goto unregister_netdev;
+   goto dev_unlink;
}
 
list_add_tail_rcu(>pnode, >ipvlans);
netif_stacked_transfer_operstate(phy_dev, dev);
return 0;
 
+dev_unlink:
+   netdev_upper_dev_unlink(phy_dev, dev);
 unregister_netdev:
unregister_netdevice(dev);
 destroy_ipvlan_port:
-- 
1.9.1




[PATCH net-next 1/1] driver: macvlan: Remove the rcu member of macvlan_port

2016-12-06 Thread fgao
From: Gao Feng 

When free macvlan_port in macvlan_port_destroy, it is safe to free
directly because netdev_rx_handler_unregister could enforce one
grace period.
So it is unnecessary to use kfree_rcu for macvlan_port.

Signed-off-by: Gao Feng 
---
 drivers/net/macvlan.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 3c0a171..20b3fdf2 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -43,7 +43,6 @@ struct macvlan_port {
struct net_device   *dev;
struct hlist_head   vlan_hash[MACVLAN_HASH_SIZE];
struct list_headvlans;
-   struct rcu_head rcu;
struct sk_buff_head bc_queue;
struct work_struct  bc_work;
boolpassthru;
@@ -1151,7 +1150,7 @@ static void macvlan_port_destroy(struct net_device *dev)
cancel_work_sync(>bc_work);
__skb_queue_purge(>bc_queue);
 
-   kfree_rcu(port, rcu);
+   kfree(port);
 }
 
 static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
-- 
1.9.1




[PATCH net-next v3 1/1] driver: ipvlan: Free ipvl_port directly with kfree instead of kfree_rcu

2016-12-06 Thread fgao
From: Gao Feng 

There are two functions which would free the ipvl_port now. The first
is ipvlan_port_create. It frees the ipvl_port in the error handler,
so it could kfree it directly. The second is ipvlan_port_destroy. It
invokes netdev_rx_handler_unregister which enforces one grace period
by synchronize_net firstly, so it also could kfree the ipvl_port
directly and safely.

So it is unnecessary to use kfree_rcu to free ipvl_port.

Signed-off-by: Gao Feng 
---
 v3: Add more detail comments
 v2: Remove the rcu of ipvl_port directly
 v1: Initial patch

 drivers/net/ipvlan/ipvlan.h  | 1 -
 drivers/net/ipvlan/ipvlan_main.c | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 05a62d2..031093e 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -97,7 +97,6 @@ struct ipvl_port {
struct work_struct  wq;
struct sk_buff_head backlog;
int count;
-   struct rcu_head rcu;
 };
 
 static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index c6aa667..44ceebc 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -128,7 +128,7 @@ static int ipvlan_port_create(struct net_device *dev)
return 0;
 
 err:
-   kfree_rcu(port, rcu);
+   kfree(port);
return err;
 }
 
@@ -145,7 +145,7 @@ static void ipvlan_port_destroy(struct net_device *dev)
netdev_rx_handler_unregister(dev);
cancel_work_sync(>wq);
__skb_queue_purge(>backlog);
-   kfree_rcu(port, rcu);
+   kfree(port);
 }
 
 #define IPVLAN_FEATURES \
-- 
1.9.1




[PATCH net-next v2 1/1] driver: ipvlan: Free ipvl_port directly with kfree instead of kfree_rcu

2016-12-06 Thread fgao
From: Gao Feng 

There is no one which may reference the ipvlan port when free it in
ipvlan_port_create and ipvlan_port_destroy. So it is unnecessary to
use kfree_rcu.

Signed-off-by: Gao Feng 
---
 v2: Remove the rcu of ipvl_port directly
 v1: Initial version

 drivers/net/ipvlan/ipvlan.h  | 1 -
 drivers/net/ipvlan/ipvlan_main.c | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 05a62d2..031093e 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -97,7 +97,6 @@ struct ipvl_port {
struct work_struct  wq;
struct sk_buff_head backlog;
int count;
-   struct rcu_head rcu;
 };
 
 static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index c6aa667..44ceebc 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -128,7 +128,7 @@ static int ipvlan_port_create(struct net_device *dev)
return 0;
 
 err:
-   kfree_rcu(port, rcu);
+   kfree(port);
return err;
 }
 
@@ -145,7 +145,7 @@ static void ipvlan_port_destroy(struct net_device *dev)
netdev_rx_handler_unregister(dev);
cancel_work_sync(>wq);
__skb_queue_purge(>backlog);
-   kfree_rcu(port, rcu);
+   kfree(port);
 }
 
 #define IPVLAN_FEATURES \
-- 
1.9.1




[PATCH net-next 1/1] driver: ipvlan: Free the port memory directly with kfree instead of kfree_rcu

2016-12-05 Thread fgao
From: Gao Feng 

There is no one which may reference the "port" in ipvlan_port_create
when netdev_rx_handler_register failed. So it could free it directly
with kfree instead of kfree_rcu.

Signed-off-by: Gao Feng 
---
 drivers/net/ipvlan/ipvlan_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index c6aa667..1a601151 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -128,7 +128,7 @@ static int ipvlan_port_create(struct net_device *dev)
return 0;
 
 err:
-   kfree_rcu(port, rcu);
+   kfree(port);
return err;
 }
 
-- 
1.9.1




[PATCH net-next 1/1] driver: ipvlan: Remove useless member mtu_adj of struct ipvl_dev

2016-11-29 Thread fgao
From: Gao Feng 

The mtu_adj is initialized to zero when alloc mem, there is no any
assignment to mtu_adj. It is only used in ipvlan_adjust_mtu as one
right value.
So it is useless member of struct ipvl_dev, then remove it.

Signed-off-by: Gao Feng 
---
 drivers/net/ipvlan/ipvlan.h  | 1 -
 drivers/net/ipvlan/ipvlan_main.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 7e0732f..05a62d2 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -73,7 +73,6 @@ struct ipvl_dev {
DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
netdev_features_t   sfeatures;
u32 msg_enable;
-   u16 mtu_adj;
 };
 
 struct ipvl_addr {
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index ab90b22..c6aa667 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -32,7 +32,7 @@
 
 static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
 {
-   ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
+   ipvlan->dev->mtu = dev->mtu;
 }
 
 static int ipvlan_register_nf_hook(void)
-- 
1.9.1




[PATCH net-next 1/1] driver: ipvlan: Add the sanity check for ipvlan mode

2016-11-28 Thread fgao
From: Gao Feng 

The ipvlan mode variable "nval" is from userspace, so the ipvlan codes
should check if the mode variable "nval" is valid.

Signed-off-by: Gao Feng 
---
 drivers/net/ipvlan/ipvlan_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index ab90b22..537b5a9 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -65,6 +65,9 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 
nval)
struct net_device *mdev = port->dev;
int err = 0;
 
+   if (nval >= IPVLAN_MODE_MAX)
+   return -EINVAL;
+
ASSERT_RTNL();
if (port->mode != nval) {
if (nval == IPVLAN_MODE_L3S) {
-- 
1.9.1




[PATCH net-next v2 1/1] driver: ipvlan: Use NF_IP_PRI_LAST as hook priority instead of INT_MAX

2016-11-27 Thread fgao
From: Gao Feng 

It is better to use NF_IP_PRI_LAST instead of INT_MAX as hook priority.
The former is good at readability and easier to maintain.

Signed-off-by: Gao Feng 
---
 v2: Add the lost header file. It is added in local but not in v1 patch
 v1: Inital patch

 drivers/net/ipvlan/ipvlan_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index ab90b22..01c7446 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -7,6 +7,7 @@
  *
  */
 
+#include "linux/netfilter_ipv4.h"
 #include "ipvlan.h"
 
 static u32 ipvl_nf_hook_refcnt = 0;
@@ -16,13 +17,13 @@
.hook = ipvlan_nf_input,
.pf   = NFPROTO_IPV4,
.hooknum  = NF_INET_LOCAL_IN,
-   .priority = INT_MAX,
+   .priority = NF_IP_PRI_LAST,
},
{
.hook = ipvlan_nf_input,
.pf   = NFPROTO_IPV6,
.hooknum  = NF_INET_LOCAL_IN,
-   .priority = INT_MAX,
+   .priority = NF_IP_PRI_LAST,
},
 };
 
-- 
1.9.1




[PATCH net-next 1/1] driver: ipvlan: Use NF_IP_PRI_LAST as hook priority instead of INT_MAX

2016-11-27 Thread fgao
From: Gao Feng 

It is better to use NF_IP_PRI_LAST instead of INT_MAX as hook priority.
The former is good at readability and easier to maintain.

Signed-off-by: Gao Feng 
---
 drivers/net/ipvlan/ipvlan_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index ab90b22..d70d245 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -16,13 +16,13 @@
.hook = ipvlan_nf_input,
.pf   = NFPROTO_IPV4,
.hooknum  = NF_INET_LOCAL_IN,
-   .priority = INT_MAX,
+   .priority = NF_IP_PRI_LAST,
},
{
.hook = ipvlan_nf_input,
.pf   = NFPROTO_IPV6,
.hooknum  = NF_INET_LOCAL_IN,
-   .priority = INT_MAX,
+   .priority = NF_IP_PRI_LAST,
},
 };
 
-- 
1.9.1




[PATCH net 1/1] driver: macvtap: Unregister netdev rx_handler if macvtap_newlink fails

2016-11-24 Thread fgao
From: Gao Feng 

The macvtap_newlink registers the netdev rx_handler firstly, but it
does not unregister the handler if macvlan_common_newlink failed.

Signed-off-by: Gao Feng 
---
 drivers/net/macvtap.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 070e329..bceca28 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -491,7 +491,13 @@ static int macvtap_newlink(struct net *src_net,
/* Don't put anything that may fail after macvlan_common_newlink
 * because we can't undo what it does.
 */
-   return macvlan_common_newlink(src_net, dev, tb, data);
+   err = macvlan_common_newlink(src_net, dev, tb, data);
+   if (err) {
+   netdev_rx_handler_unregister(dev);
+   return err;
+   }
+
+   return 0;
 }
 
 static void macvtap_dellink(struct net_device *dev,
-- 
1.9.1




[PATCH net 1/1] driver: ipvlan: Fix one possible memleak in ipvlan_link_new

2016-11-24 Thread fgao
From: Gao Feng 

When ipvlan_link_new fails and creates one ipvlan port, it does not
destroy the ipvlan port created. It causes mem leak and the physical
device contains invalid ipvlan data.

Signed-off-by: Gao Feng 
---
 drivers/net/ipvlan/ipvlan_main.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index f442eb3..0fef178 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -497,6 +497,7 @@ static int ipvlan_link_new(struct net *src_net, struct 
net_device *dev,
struct net_device *phy_dev;
int err;
u16 mode = IPVLAN_MODE_L3;
+   bool create = false;
 
if (!tb[IFLA_LINK])
return -EINVAL;
@@ -513,6 +514,7 @@ static int ipvlan_link_new(struct net *src_net, struct 
net_device *dev,
err = ipvlan_port_create(phy_dev);
if (err < 0)
return err;
+   create = true;
}
 
if (data && data[IFLA_IPVLAN_MODE])
@@ -536,22 +538,27 @@ static int ipvlan_link_new(struct net *src_net, struct 
net_device *dev,
 
err = register_netdevice(dev);
if (err < 0)
-   return err;
+   goto destroy_ipvlan_port;
 
err = netdev_upper_dev_link(phy_dev, dev);
if (err) {
-   unregister_netdevice(dev);
-   return err;
+   goto unregister_netdev;
}
err = ipvlan_set_port_mode(port, mode);
if (err) {
-   unregister_netdevice(dev);
-   return err;
+   goto unregister_netdev;
}
 
list_add_tail_rcu(>pnode, >ipvlans);
netif_stacked_transfer_operstate(phy_dev, dev);
return 0;
+
+unregister_netdev:
+   unregister_netdevice(dev);
+destroy_ipvlan_port:
+   if (create)
+   ipvlan_port_destroy(phy_dev);
+   return err;
 }
 
 static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
-- 
1.9.1




[PATCH net 1/1] driver: macvlan: Check if need rollback multicast setting in macvlan_open

2016-11-21 Thread fgao
From: Gao Feng 

When dev_set_promiscuity failed in macvlan_open, it always invokes
dev_set_allmulti without checking if necessary.
Now check the IFF_ALLMULTI flag firstly before rollback the multicast
setting in the error handler.

Signed-off-by: Gao Feng 
---
 drivers/net/macvlan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d2d6f12..26d6f0b 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -623,7 +623,8 @@ static int macvlan_open(struct net_device *dev)
return 0;
 
 clear_multi:
-   dev_set_allmulti(lowerdev, -1);
+   if (dev->flags & IFF_ALLMULTI)
+   dev_set_allmulti(lowerdev, -1);
 del_unicast:
dev_uc_del(lowerdev, dev->dev_addr);
 out:
-- 
1.9.1




[PATCH net-next 1/2] net: batman-adv: Treat NET_XMIT_CN as transmit successfully

2016-11-21 Thread fgao
From: Gao Feng 

The tc could return NET_XMIT_CN as one congestion notification, but
it does not mean the packet is lost. Other modules like ipvlan,
macvlan, and others treat NET_XMIT_CN as success too.

So batman-adv should add the NET_XMIT_CN check.

Signed-off-by: Gao Feng 
---
 net/batman-adv/distributed-arp-table.c |  2 +-
 net/batman-adv/fragmentation.c |  2 +-
 net/batman-adv/routing.c   | 10 +-
 net/batman-adv/soft-interface.c|  2 +-
 net/batman-adv/tp_meter.c  |  2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/batman-adv/distributed-arp-table.c 
b/net/batman-adv/distributed-arp-table.c
index 49576c5..f6ff4de 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -659,7 +659,7 @@ static bool batadv_dat_send_data(struct batadv_priv 
*bat_priv,
}
 
send_status = batadv_send_unicast_skb(tmp_skb, neigh_node);
-   if (send_status == NET_XMIT_SUCCESS) {
+   if (send_status == NET_XMIT_SUCCESS || send_status == 
NET_XMIT_CN) {
/* count the sent packet */
switch (packet_subtype) {
case BATADV_P_DAT_DHT_GET:
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 9c561e6..5239616 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -509,7 +509,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
   skb_fragment->len + ETH_HLEN);
ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
-   if (ret != NET_XMIT_SUCCESS) {
+   if (ret != NET_XMIT_SUCCESS && ret != NET_XMIT_CN) {
ret = NET_XMIT_DROP;
goto free_skb;
}
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 6713bdf..6b08b26 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -262,7 +262,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv 
*bat_priv,
icmph->ttl = BATADV_TTL;
 
res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-   if (res == NET_XMIT_SUCCESS)
+   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN)
ret = NET_RX_SUCCESS;
 
/* skb was consumed */
@@ -330,7 +330,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv 
*bat_priv,
icmp_packet->ttl = BATADV_TTL;
 
res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-   if (res == NET_RX_SUCCESS)
+   if (res == NET_RX_SUCCESS || res == NET_XMIT_CN)
ret = NET_XMIT_SUCCESS;
 
/* skb was consumed */
@@ -424,7 +424,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 
/* route it */
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
-   if (res == NET_XMIT_SUCCESS)
+   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN)
ret = NET_RX_SUCCESS;
 
/* skb was consumed */
@@ -719,14 +719,14 @@ static int batadv_route_unicast_packet(struct sk_buff 
*skb,
 
len = skb->len;
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
-   if (res == NET_XMIT_SUCCESS)
+   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN)
ret = NET_RX_SUCCESS;
 
/* skb was consumed */
skb = NULL;
 
/* translate transmit result into receive result */
-   if (res == NET_XMIT_SUCCESS) {
+   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN) {
/* skb was transmitted and consumed */
batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7b3494a..60516bb 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -386,7 +386,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint,
 vid);
}
-   if (ret != NET_XMIT_SUCCESS)
+   if (ret != NET_XMIT_SUCCESS && ret != NET_XMIT_CN)
goto dropped_freed;
}
 
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index f156452..44bfb1e 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -615,7 +615,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars 
*tp_vars, const u8 *src,
batadv_tp_fill_prerandom(tp_vars, data, data_len);
 
r = batadv_send_skb_to_orig(skb, orig_node, NULL);
-   if (r == NET_XMIT_SUCCESS)
+   if (r == NET_XMIT_SUCCESS || r == NET_XMIT_CN)

[PATCH net-next 2/2] net: batman-adv: Remove one condition check in batadv_route_unicast_packet

2016-11-21 Thread fgao
From: Gao Feng 

It could decrease one condition check to collect some statements in the
first condition block.

Signed-off-by: Gao Feng 
---
 net/batman-adv/routing.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 6b08b26..9d657cf 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -719,20 +719,18 @@ static int batadv_route_unicast_packet(struct sk_buff 
*skb,
 
len = skb->len;
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
-   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN)
-   ret = NET_RX_SUCCESS;
-
-   /* skb was consumed */
-   skb = NULL;
-
/* translate transmit result into receive result */
if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN) {
+   ret = NET_RX_SUCCESS;
/* skb was transmitted and consumed */
batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
   len + ETH_HLEN);
}
 
+   /* skb was consumed */
+   skb = NULL;
+
 put_orig_node:
batadv_orig_node_put(orig_node);
 free_skb:
-- 
1.9.1




[PATCH net-next 1/2] net: batman-adv: Treat NET_XMIT_CN as transmit successfully

2016-11-21 Thread fgao
From: Gao Feng 

The tc could return NET_XMIT_CN as one congestion notification, but
it does not mean the packet is lost. Other modules like ipvlan,
macvlan, and others treat NET_XMIT_CN as success too.

So batman-adv should add the NET_XMIT_CN check.

Signed-off-by: Gao Feng 
---
 net/batman-adv/distributed-arp-table.c |  2 +-
 net/batman-adv/fragmentation.c |  2 +-
 net/batman-adv/routing.c   | 10 +-
 net/batman-adv/soft-interface.c|  2 +-
 net/batman-adv/tp_meter.c  |  2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/batman-adv/distributed-arp-table.c 
b/net/batman-adv/distributed-arp-table.c
index 49576c5..f6ff4de 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -659,7 +659,7 @@ static bool batadv_dat_send_data(struct batadv_priv 
*bat_priv,
}
 
send_status = batadv_send_unicast_skb(tmp_skb, neigh_node);
-   if (send_status == NET_XMIT_SUCCESS) {
+   if (send_status == NET_XMIT_SUCCESS || send_status == 
NET_XMIT_CN) {
/* count the sent packet */
switch (packet_subtype) {
case BATADV_P_DAT_DHT_GET:
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 9c561e6..5239616 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -509,7 +509,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
   skb_fragment->len + ETH_HLEN);
ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
-   if (ret != NET_XMIT_SUCCESS) {
+   if (ret != NET_XMIT_SUCCESS && ret != NET_XMIT_CN) {
ret = NET_XMIT_DROP;
goto free_skb;
}
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 6713bdf..6b08b26 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -262,7 +262,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv 
*bat_priv,
icmph->ttl = BATADV_TTL;
 
res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-   if (res == NET_XMIT_SUCCESS)
+   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN)
ret = NET_RX_SUCCESS;
 
/* skb was consumed */
@@ -330,7 +330,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv 
*bat_priv,
icmp_packet->ttl = BATADV_TTL;
 
res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-   if (res == NET_RX_SUCCESS)
+   if (res == NET_RX_SUCCESS || res == NET_XMIT_CN)
ret = NET_XMIT_SUCCESS;
 
/* skb was consumed */
@@ -424,7 +424,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 
/* route it */
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
-   if (res == NET_XMIT_SUCCESS)
+   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN)
ret = NET_RX_SUCCESS;
 
/* skb was consumed */
@@ -719,14 +719,14 @@ static int batadv_route_unicast_packet(struct sk_buff 
*skb,
 
len = skb->len;
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
-   if (res == NET_XMIT_SUCCESS)
+   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN)
ret = NET_RX_SUCCESS;
 
/* skb was consumed */
skb = NULL;
 
/* translate transmit result into receive result */
-   if (res == NET_XMIT_SUCCESS) {
+   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN) {
/* skb was transmitted and consumed */
batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7b3494a..60516bb 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -386,7 +386,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint,
 vid);
}
-   if (ret != NET_XMIT_SUCCESS)
+   if (ret != NET_XMIT_SUCCESS && ret != NET_XMIT_CN)
goto dropped_freed;
}
 
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index f156452..44bfb1e 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -615,7 +615,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars 
*tp_vars, const u8 *src,
batadv_tp_fill_prerandom(tp_vars, data, data_len);
 
r = batadv_send_skb_to_orig(skb, orig_node, NULL);
-   if (r == NET_XMIT_SUCCESS)
+   if (r == NET_XMIT_SUCCESS || r == NET_XMIT_CN)

[PATCH net v2 1/1] net: batman-adv: Treat NET_XMIT_CN as transmit successfully

2016-11-21 Thread fgao
From: Gao Feng 

The tc could return NET_XMIT_CN as one congestion notification, but
it does not mean the packet is lost. Other modules like ipvlan,
macvlan, and others treat NET_XMIT_CN as success too.

So batman-adv should add the NET_XMIT_CN check.

Signed-off-by: Gao Feng 
---
 v2: Correct two typo "packe" and "ret"
 v1: Initial version

 net/batman-adv/distributed-arp-table.c | 2 +-
 net/batman-adv/fragmentation.c | 2 +-
 net/batman-adv/routing.c   | 2 +-
 net/batman-adv/tp_meter.c  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/batman-adv/distributed-arp-table.c 
b/net/batman-adv/distributed-arp-table.c
index e257efd..4bf0622 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -660,7 +660,7 @@ static bool batadv_dat_send_data(struct batadv_priv 
*bat_priv,
}
 
send_status = batadv_send_unicast_skb(tmp_skb, neigh_node);
-   if (send_status == NET_XMIT_SUCCESS) {
+   if (send_status == NET_XMIT_SUCCESS || send_status == 
NET_XMIT_CN) {
/* count the sent packet */
switch (packet_subtype) {
case BATADV_P_DAT_DHT_GET:
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0934730..4714b8f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -495,7 +495,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
   skb_fragment->len + ETH_HLEN);
ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
-   if (ret != NET_XMIT_SUCCESS) {
+   if (ret != NET_XMIT_SUCCESS && ret != NET_XMIT_CN) {
/* return -1 so that the caller can free the original
 * skb
 */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7e8dc64..f44fb07 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -706,7 +706,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
goto out;
 
/* translate transmit result into receive result */
-   if (res == NET_XMIT_SUCCESS) {
+   if (res == NET_XMIT_SUCCESS || res == NET_XMIT_CN) {
/* skb was transmitted and consumed */
batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8af1611..461dbad 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -618,7 +618,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars 
*tp_vars, const u8 *src,
if (r == -1)
kfree_skb(skb);
 
-   if (r == NET_XMIT_SUCCESS)
+   if (r == NET_XMIT_SUCCESS || r == NET_XMIT_CN)
return 0;
 
return BATADV_TP_REASON_CANT_SEND;
-- 
1.9.1




[PATCH net 1/1] net: l2tp: Treat NET_XMIT_CN as success in l2tp_eth_dev_xmit

2016-11-20 Thread fgao
From: Gao Feng 

The tc could return NET_XMIT_CN as one congestion notification, but
it does not mean the packe is lost. Other modules like ipvlan,
macvlan, and others treat NET_XMIT_CN as success too.
So l2tp_eth_dev_xmit should add the NET_XMIT_CN check.

Signed-off-by: Gao Feng 
---
 net/l2tp/l2tp_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 965f7e3..3dc97b4 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -97,7 +97,7 @@ static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct 
net_device *dev)
unsigned int len = skb->len;
int ret = l2tp_xmit_skb(session, skb, session->hdr_len);
 
-   if (likely(ret == NET_XMIT_SUCCESS)) {
+   if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
atomic_long_add(len, >tx_bytes);
atomic_long_inc(>tx_packets);
} else {
-- 
1.9.1




[PATCH net 1/1] net: batman-adv: Treat NET_XMIT_CN as transmit successfully

2016-11-20 Thread fgao
From: Gao Feng 

The tc could return NET_XMIT_CN as one congestion notification, but
it does not mean the packe is lost. Other modules like ipvlan,
macvlan, and others treat NET_XMIT_CN as success too.

So batman-adv should add the NET_XMIT_CN check.

Signed-off-by: Gao Feng 
---
 net/batman-adv/distributed-arp-table.c | 2 +-
 net/batman-adv/fragmentation.c | 2 +-
 net/batman-adv/routing.c   | 2 +-
 net/batman-adv/tp_meter.c  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/batman-adv/distributed-arp-table.c 
b/net/batman-adv/distributed-arp-table.c
index e257efd..4bf0622 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -660,7 +660,7 @@ static bool batadv_dat_send_data(struct batadv_priv 
*bat_priv,
}
 
send_status = batadv_send_unicast_skb(tmp_skb, neigh_node);
-   if (send_status == NET_XMIT_SUCCESS) {
+   if (send_status == NET_XMIT_SUCCESS || send_status == 
NET_XMIT_CN) {
/* count the sent packet */
switch (packet_subtype) {
case BATADV_P_DAT_DHT_GET:
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0934730..4714b8f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -495,7 +495,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
   skb_fragment->len + ETH_HLEN);
ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
-   if (ret != NET_XMIT_SUCCESS) {
+   if (ret != NET_XMIT_SUCCESS && ret != NET_XMIT_CN) {
/* return -1 so that the caller can free the original
 * skb
 */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7e8dc64..8edd324 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -706,7 +706,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
goto out;
 
/* translate transmit result into receive result */
-   if (res == NET_XMIT_SUCCESS) {
+   if (res == NET_XMIT_SUCCESS || ret == NET_XMIT_CN) {
/* skb was transmitted and consumed */
batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8af1611..461dbad 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -618,7 +618,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars 
*tp_vars, const u8 *src,
if (r == -1)
kfree_skb(skb);
 
-   if (r == NET_XMIT_SUCCESS)
+   if (r == NET_XMIT_SUCCESS || r == NET_XMIT_CN)
return 0;
 
return BATADV_TP_REASON_CANT_SEND;
-- 
1.9.1




[PATCH net-next 1/1] driver: macvlan: Remove duplicated IFF_UP condition check in macvlan_forward_source

2016-11-20 Thread fgao
From: Gao Feng 

The function macvlan_forward_source_one has already checked the flag
IFF_UP, so needn't check it outside in macvlan_forward_source too.

Signed-off-by: Gao Feng 
---
 v2: Remove the IFF_UP check in macvlan_forward_source instead of 
macvlan_forward_source_one
 v1: Initial patch

 drivers/net/macvlan.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 13b7e0b..7ddfd2c 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -400,8 +400,7 @@ static void macvlan_forward_source(struct sk_buff *skb,
 
hlist_for_each_entry_rcu(entry, h, hlist) {
if (ether_addr_equal_64bits(entry->addr, addr))
-   if (entry->vlan->dev->flags & IFF_UP)
-   macvlan_forward_source_one(skb, entry->vlan);
+   macvlan_forward_source_one(skb, entry->vlan);
}
 }
 
-- 
1.9.1




[PATCH net-next 1/1] driver: macvlan: Remove duplicated IFF_UP condition check in macvlan_forward_source_one

2016-11-20 Thread fgao
From: Gao Feng 

The condition check "dev->flags & IFF_UP" is duplicated in
macvlan_forward_source_one, because its caller macvlan_forward_source
has already checked this flag.

Signed-off-by: Gao Feng 
---
 drivers/net/macvlan.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 13b7e0b..95a5ffc 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -375,9 +375,6 @@ static void macvlan_forward_source_one(struct sk_buff *skb,
int ret;
 
dev = vlan->dev;
-   if (unlikely(!(dev->flags & IFF_UP)))
-   return;
-
nskb = skb_clone(skb, GFP_ATOMIC);
if (!nskb)
return;
-- 
1.9.1




[PATCH net-next 1/1] driver: macvlan: Replace integer number with bool value

2016-11-13 Thread fgao
From: Gao Feng 

The return value of function macvlan_addr_busy is used as bool value,
so use bool value instead of integer number "1" and "0".

Signed-off-by: Gao Feng 
---
 drivers/net/macvlan.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index a064415..d0361f3 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -179,20 +179,20 @@ static void macvlan_hash_change_addr(struct macvlan_dev 
*vlan,
macvlan_hash_add(vlan);
 }
 
-static int macvlan_addr_busy(const struct macvlan_port *port,
-   const unsigned char *addr)
+static bool macvlan_addr_busy(const struct macvlan_port *port,
+ const unsigned char *addr)
 {
/* Test to see if the specified multicast address is
 * currently in use by the underlying device or
 * another macvlan.
 */
if (ether_addr_equal_64bits(port->dev->dev_addr, addr))
-   return 1;
+   return true;
 
if (macvlan_hash_lookup(port, addr))
-   return 1;
+   return true;
 
-   return 0;
+   return false;
 }
 
 
-- 
1.9.1




[PATCH net 1/1] driver: macvlan: Destroy new macvlan port if macvlan_common_newlink failed.

2016-11-03 Thread fgao
From: Gao Feng 

When there is no existing macvlan port in lowdev, one new macvlan port
would be created. But it doesn't be destoried when something failed later.
It casues some memleak.

Now add one flag to indicate if new macvlan port is created.

Signed-off-by: Gao Feng 
---
 drivers/net/macvlan.c | 31 ++-
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 3234fcd..d2d6f12 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1278,6 +1278,7 @@ int macvlan_common_newlink(struct net *src_net, struct 
net_device *dev,
struct net_device *lowerdev;
int err;
int macmode;
+   bool create = false;
 
if (!tb[IFLA_LINK])
return -EINVAL;
@@ -1304,12 +1305,18 @@ int macvlan_common_newlink(struct net *src_net, struct 
net_device *dev,
err = macvlan_port_create(lowerdev);
if (err < 0)
return err;
+   create = true;
}
port = macvlan_port_get_rtnl(lowerdev);
 
/* Only 1 macvlan device can be created in passthru mode */
-   if (port->passthru)
-   return -EINVAL;
+   if (port->passthru) {
+   /* The macvlan port must be not created this time,
+* still goto destroy_macvlan_port for readability.
+*/
+   err = -EINVAL;
+   goto destroy_macvlan_port;
+   }
 
vlan->lowerdev = lowerdev;
vlan->dev  = dev;
@@ -1325,24 +1332,28 @@ int macvlan_common_newlink(struct net *src_net, struct 
net_device *dev,
vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
 
if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
-   if (port->count)
-   return -EINVAL;
+   if (port->count) {
+   err = -EINVAL;
+   goto destroy_macvlan_port;
+   }
port->passthru = true;
eth_hw_addr_inherit(dev, lowerdev);
}
 
if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
-   if (vlan->mode != MACVLAN_MODE_SOURCE)
-   return -EINVAL;
+   if (vlan->mode != MACVLAN_MODE_SOURCE) {
+   err = -EINVAL;
+   goto destroy_macvlan_port;
+   }
macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]);
err = macvlan_changelink_sources(vlan, macmode, data);
if (err)
-   return err;
+   goto destroy_macvlan_port;
}
 
err = register_netdevice(dev);
if (err < 0)
-   return err;
+   goto destroy_macvlan_port;
 
dev->priv_flags |= IFF_MACVLAN;
err = netdev_upper_dev_link(lowerdev, dev);
@@ -1357,7 +1368,9 @@ int macvlan_common_newlink(struct net *src_net, struct 
net_device *dev,
 
 unregister_netdev:
unregister_netdevice(dev);
-
+destroy_macvlan_port:
+   if (create)
+   macvlan_port_destroy(port->dev);
return err;
 }
 EXPORT_SYMBOL_GPL(macvlan_common_newlink);
-- 
1.9.1




[PATCH net-next v2 1/1] driver: veth: Refine the statistics codes of veth driver

2016-11-03 Thread fgao
From: Gao Feng 

The dropped count of veth is located in struct veth_priv, but other
statistics like packets and bytes are in another struct pcpu_vstats.
Now keep these three counters in the same struct.

Signed-off-by: Gao Feng 
---
 v2: Use right "peer" instead of "dev";
 v1: Initial version

 drivers/net/veth.c | 32 ++--
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 0520952a..0d669b4 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -26,12 +26,12 @@
 struct pcpu_vstats {
u64 packets;
u64 bytes;
+   u64 dropped;
struct u64_stats_sync   syncp;
 };
 
 struct veth_priv {
struct net_device __rcu *peer;
-   atomic64_t  dropped;
unsignedrequested_headroom;
 };
 
@@ -108,6 +108,8 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct 
net_device *dev)
struct veth_priv *priv = netdev_priv(dev);
struct net_device *rcv;
int length = skb->len;
+   struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
+   int ret = NET_RX_DROP;
 
rcu_read_lock();
rcv = rcu_dereference(priv->peer);
@@ -116,17 +118,16 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct 
net_device *dev)
goto drop;
}
 
-   if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
-   struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
-
-   u64_stats_update_begin(>syncp);
+   ret = dev_forward_skb(rcv, skb);
+drop:
+   u64_stats_update_begin(>syncp);
+   if (likely(ret == NET_RX_SUCCESS)) {
stats->bytes += length;
stats->packets++;
-   u64_stats_update_end(>syncp);
} else {
-drop:
-   atomic64_inc(>dropped);
+   stats->dropped++;
}
+   u64_stats_update_end(>syncp);
rcu_read_unlock();
return NETDEV_TX_OK;
 }
@@ -135,27 +136,28 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct 
net_device *dev)
  * general routines
  */
 
-static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
+static void veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
 {
-   struct veth_priv *priv = netdev_priv(dev);
int cpu;
 
result->packets = 0;
result->bytes = 0;
+   result->dropped = 0;
for_each_possible_cpu(cpu) {
struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
-   u64 packets, bytes;
+   u64 packets, bytes, dropped;
unsigned int start;
 
do {
start = u64_stats_fetch_begin_irq(>syncp);
packets = stats->packets;
bytes = stats->bytes;
+   dropped = stats->dropped;
} while (u64_stats_fetch_retry_irq(>syncp, start));
result->packets += packets;
result->bytes += bytes;
+   result->dropped += dropped;
}
-   return atomic64_read(>dropped);
 }
 
 static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
@@ -165,16 +167,18 @@ static struct rtnl_link_stats64 *veth_get_stats64(struct 
net_device *dev,
struct net_device *peer;
struct pcpu_vstats one;
 
-   tot->tx_dropped = veth_stats_one(, dev);
+   veth_stats_one(, dev);
tot->tx_bytes = one.bytes;
tot->tx_packets = one.packets;
+   tot->tx_dropped = one.dropped;
 
rcu_read_lock();
peer = rcu_dereference(priv->peer);
if (peer) {
-   tot->rx_dropped = veth_stats_one(, peer);
+   veth_stats_one(, peer);
tot->rx_bytes = one.bytes;
tot->rx_packets = one.packets;
+   tot->rx_dropped = one.dropped;
}
rcu_read_unlock();
 
-- 
1.9.1




[PATCH net-next 1/1] driver: veth: Refine the statistics codes of veth driver

2016-11-03 Thread fgao
From: Gao Feng 

The dropped count of veth is located in struct veth_priv, but other
statistics like packets and bytes are in another struct pcpu_vstats.
Now keep these three counters in the same struct.

Signed-off-by: Gao Feng 
---
 drivers/net/veth.c | 32 ++--
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 0520952a..3d8326f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -26,12 +26,12 @@
 struct pcpu_vstats {
u64 packets;
u64 bytes;
+   u64 dropped;
struct u64_stats_sync   syncp;
 };
 
 struct veth_priv {
struct net_device __rcu *peer;
-   atomic64_t  dropped;
unsignedrequested_headroom;
 };
 
@@ -108,6 +108,8 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct 
net_device *dev)
struct veth_priv *priv = netdev_priv(dev);
struct net_device *rcv;
int length = skb->len;
+   struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
+   int ret = NET_RX_DROP;
 
rcu_read_lock();
rcv = rcu_dereference(priv->peer);
@@ -116,17 +118,16 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct 
net_device *dev)
goto drop;
}
 
-   if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
-   struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
-
-   u64_stats_update_begin(>syncp);
+   ret = dev_forward_skb(rcv, skb);
+drop:
+   u64_stats_update_begin(>syncp);
+   if (likely(ret == NET_RX_SUCCESS)) {
stats->bytes += length;
stats->packets++;
-   u64_stats_update_end(>syncp);
} else {
-drop:
-   atomic64_inc(>dropped);
+   stats->dropped++;
}
+   u64_stats_update_end(>syncp);
rcu_read_unlock();
return NETDEV_TX_OK;
 }
@@ -135,27 +136,28 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct 
net_device *dev)
  * general routines
  */
 
-static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
+static void veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
 {
-   struct veth_priv *priv = netdev_priv(dev);
int cpu;
 
result->packets = 0;
result->bytes = 0;
+   result->dropped = 0;
for_each_possible_cpu(cpu) {
struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
-   u64 packets, bytes;
+   u64 packets, bytes, dropped;
unsigned int start;
 
do {
start = u64_stats_fetch_begin_irq(>syncp);
packets = stats->packets;
bytes = stats->bytes;
+   dropped = stats->dropped;
} while (u64_stats_fetch_retry_irq(>syncp, start));
result->packets += packets;
result->bytes += bytes;
+   result->dropped += dropped;
}
-   return atomic64_read(>dropped);
 }
 
 static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
@@ -165,16 +167,18 @@ static struct rtnl_link_stats64 *veth_get_stats64(struct 
net_device *dev,
struct net_device *peer;
struct pcpu_vstats one;
 
-   tot->tx_dropped = veth_stats_one(, dev);
+   veth_stats_one(, dev);
tot->tx_bytes = one.bytes;
tot->tx_packets = one.packets;
+   tot->tx_dropped = one.dropped;
 
rcu_read_lock();
peer = rcu_dereference(priv->peer);
if (peer) {
-   tot->rx_dropped = veth_stats_one(, peer);
+   veth_stats_one(, dev);
tot->rx_bytes = one.bytes;
tot->rx_packets = one.packets;
+   tot->rx_dropped = one.dropped;
}
rcu_read_unlock();
 
-- 
1.9.1




[PATCH net 1/1] driver: veth: Return the actual value instead return NETDEV_TX_OK always

2016-11-02 Thread fgao
From: Gao Feng 

Current veth_xmit always returns NETDEV_TX_OK whatever if it is really
sent successfully. Now return the actual value instead of NETDEV_TX_OK
always.

Signed-off-by: Gao Feng 
---
 drivers/net/veth.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index fbc853e..769a3bd 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -111,15 +111,18 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct 
net_device *dev)
struct veth_priv *priv = netdev_priv(dev);
struct net_device *rcv;
int length = skb->len;
+   int ret = NETDEV_TX_OK;
 
rcu_read_lock();
rcv = rcu_dereference(priv->peer);
if (unlikely(!rcv)) {
kfree_skb(skb);
+   ret = NET_RX_DROP;
goto drop;
}
 
-   if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
+   ret = dev_forward_skb(rcv, skb);
+   if (likely(ret == NET_RX_SUCCESS)) {
struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
 
u64_stats_update_begin(>syncp);
@@ -131,7 +134,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct 
net_device *dev)
atomic64_inc(>dropped);
}
rcu_read_unlock();
-   return NETDEV_TX_OK;
+   return ret;
 }
 
 /*
-- 
1.9.1




[PATCH net-next v2 1/1] driver: tun: Use new macro SOCK_IOC_TYPE instead of literal number 0x89

2016-10-26 Thread fgao
From: Gao Feng 

The current codes use _IOC_TYPE(cmd) == 0x89 to check if the cmd is one
socket ioctl command like SIOCGIFHWADDR. But the literal number 0x89 may
confuse readers. So create one macro SOCK_IOC_TYPE to enhance the readability.

Signed-off-by: Gao Feng 
---
 v2: Rename the macro SOCK_IOC_MAGIC to SOCK_IOC_TYPE
 v1: Initial version

 drivers/net/tun.c| 2 +-
 include/uapi/linux/sockios.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9328568..bb49ccb 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1985,7 +1985,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
int le;
int ret;
 
-   if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) {
+   if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 
SOCK_IOC_TYPE) {
if (copy_from_user(, argp, ifreq_len))
return -EFAULT;
} else {
diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h
index 8e7890b..c977d0e 100644
--- a/include/uapi/linux/sockios.h
+++ b/include/uapi/linux/sockios.h
@@ -24,6 +24,8 @@
 #define SIOCINQFIONREAD
 #define SIOCOUTQ   TIOCOUTQ/* output queue size (not sent + not 
acked) */
 
+#define SOCK_IOC_TYPE  0x89
+
 /* Routing table calls. */
 #define SIOCADDRT  0x890B  /* add routing table entry  */
 #define SIOCDELRT  0x890C  /* delete routing table entry   */
-- 
1.9.1




[PATCH net-next 1/1] driver: tun: Move tun check into the block of TUNSETIFF condition check

2016-10-25 Thread fgao
From: Gao Feng 

When cmd is TUNSETIFF and tun is not null, the original codes go ahead,
then reach the default case of switch(cmd) and set the ret is -EINVAL.
It is not clear for readers.

Now move the tun check into the block of TUNSETIFF condition check, and
return -EEXIST instead of -EINVAL when the tfile already owns one tun.

Signed-off-by: Gao Feng 
---
 drivers/net/tun.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9328568..9142db8 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2005,7 +2005,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
rtnl_lock();
 
tun = __tun_get(tfile);
-   if (cmd == TUNSETIFF && !tun) {
+   if (cmd == TUNSETIFF) {
+   ret = -EEXIST;
+   if (tun)
+   goto unlock;
+
ifr.ifr_name[IFNAMSIZ-1] = '\0';
 
ret = tun_set_iff(sock_net(>sk), file, );
-- 
1.9.1




[PATCH net-next 1/1] driver: tun: Use new macro SOCK_IOC_MAGIC instead of literal number 0x89

2016-10-25 Thread fgao
From: Gao Feng 

The current codes use _IOC_TYPE(cmd) == 0x89 to check if the cmd is one
socket ioctl command like SIOCGIFHWADDR. But the literal number 0x89 may
confuse readers. So create one macro SOCK_IOC_MAGIC like SPI_IOC_MAGIC to
enhance the readability.

Signed-off-by: Gao Feng 
---
 drivers/net/tun.c| 2 +-
 include/uapi/linux/sockios.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9328568..9efd185 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1985,7 +1985,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
int le;
int ret;
 
-   if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) {
+   if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 
SOCK_IOC_MAGIC) {
if (copy_from_user(, argp, ifreq_len))
return -EFAULT;
} else {
diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h
index 8e7890b..b8f42f2 100644
--- a/include/uapi/linux/sockios.h
+++ b/include/uapi/linux/sockios.h
@@ -24,6 +24,8 @@
 #define SIOCINQFIONREAD
 #define SIOCOUTQ   TIOCOUTQ/* output queue size (not sent + not 
acked) */
 
+#define SOCK_IOC_MAGIC 0x89
+
 /* Routing table calls. */
 #define SIOCADDRT  0x890B  /* add routing table entry  */
 #define SIOCDELRT  0x890C  /* delete routing table entry   */
-- 
1.9.1




[PATCH v2 net-next 1/1] driver: tun: Forbid to set IFF_TUN and IFF_TAP at the same time

2016-10-21 Thread fgao
From: Gao Feng 

Current tun driver permits the ifr_flags is set with IFF_TUN and
IFF_TAP at the same time. But actually there is only IFF_TUN flag
works. And it does not make sense these two flags are set, so add
this check.

Signed-off-by: Gao Feng 
---
 v2: Remove useless {}
 v1: Initial patch

 drivers/net/tun.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8093e39..faaa189 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1752,6 +1752,9 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
if (err < 0)
return err;
 
+   if ((ifr->ifr_flags & (IFF_TUN | IFF_TAP)) == (IFF_TUN | 
IFF_TAP))
+   return -EINVAL;
+
/* Set dev type */
if (ifr->ifr_flags & IFF_TUN) {
/* TUN device */
-- 
1.9.1




[PATCH net-next 1/1] driver: tun: Forbid to set IFF_TUN and IFF_TAP at the same time

2016-10-21 Thread fgao
From: Gao Feng 

Current tun driver permits the ifr_flags is set with IFF_TUN and
IFF_TAP at the same time. But actually there is only IFF_TUN flag
works. And it does not make sense these two flags are set, so add
this check.

Signed-off-by: Gao Feng 
---
 drivers/net/tun.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8093e39..c1f89c1 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1752,6 +1752,10 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
if (err < 0)
return err;
 
+   if ((ifr->ifr_flags & (IFF_TUN | IFF_TAP)) == (IFF_TUN | 
IFF_TAP)) {
+   return -EINVAL;
+   }
+
/* Set dev type */
if (ifr->ifr_flags & IFF_TUN) {
/* TUN device */
-- 
1.9.1




[PATCH net-next 1/1] net: vlan: Use sizeof instead of literal number

2016-10-17 Thread fgao
From: Gao Feng 

Use sizeof variable instead of literal number to enhance the readability.

Signed-off-by: Gao Feng 
---
 net/8021q/vlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8de138d..5a3903b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -515,8 +515,8 @@ static int vlan_ioctl_handler(struct net *net, void __user 
*arg)
return -EFAULT;
 
/* Null terminate this sucker, just in case. */
-   args.device1[23] = 0;
-   args.u.device2[23] = 0;
+   args.device1[sizeof(args.device1) - 1] = 0;
+   args.u.device2[sizeof(args.u.device2) - 1] = 0;
 
rtnl_lock();
 
-- 
1.9.1




[PATCH 1/1] net: vlan: Use sizeof instead of literal number

2016-10-17 Thread fgao
From: Gao Feng 

Use sizeof variable instead of literal number to enhance the readability.

Signed-off-by: Gao Feng 
---
 net/8021q/vlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8de138d..5a3903b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -515,8 +515,8 @@ static int vlan_ioctl_handler(struct net *net, void __user 
*arg)
return -EFAULT;
 
/* Null terminate this sucker, just in case. */
-   args.device1[23] = 0;
-   args.u.device2[23] = 0;
+   args.device1[sizeof(args.device1) - 1] = 0;
+   args.u.device2[sizeof(args.u.device2) - 1] = 0;
 
rtnl_lock();
 
-- 
1.9.1




[PATCH nf-next] netfilter: xt_osf: Use explicit member assignment to avoid implicit no padding rule

2016-09-26 Thread fgao
From: Gao Feng 

Current xt_osf codes use memcmp to check if two user fingers are same,
so it depends on that the struct xt_osf_user_finger is no padding.
It is one implicit rule, and is not good to maintain.

Now use zero memory and assign the members explicitly.

Signed-off-by: Gao Feng 
---
 net/netfilter/xt_osf.c | 32 ++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 2455b69..9793670 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -61,6 +61,34 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 
1] = {
[OSF_ATTR_FINGER]   = { .len = sizeof(struct xt_osf_user_finger) },
 };
 
+static void copy_user_finger(struct xt_osf_user_finger *dst,
+const struct xt_osf_user_finger *src)
+{
+#define OSF_COPY_MEMBER(mem)   dst->mem = src->mem
+
+   int i;
+
+   OSF_COPY_MEMBER(wss.wc);
+   OSF_COPY_MEMBER(wss.val);
+
+   OSF_COPY_MEMBER(ttl);
+   OSF_COPY_MEMBER(df);
+   OSF_COPY_MEMBER(ss);
+   OSF_COPY_MEMBER(mss);
+   OSF_COPY_MEMBER(opt_num);
+
+   memcpy(dst->genre, src->genre, sizeof(dst->genre));
+   memcpy(dst->version, src->version, sizeof(dst->version));
+   memcpy(dst->subtype, src->subtype, sizeof(dst->subtype));
+
+   for (i = 0; i < MAX_IPOPTLEN; ++i) {
+   OSF_COPY_MEMBER(opt[i].kind);
+   OSF_COPY_MEMBER(opt[i].length);
+   OSF_COPY_MEMBER(opt[i].wc.wc);
+   OSF_COPY_MEMBER(opt[i].wc.val);
+   }
+}
+
 static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
   struct sk_buff *skb, const struct nlmsghdr *nlh,
   const struct nlattr * const osf_attrs[])
@@ -77,11 +105,11 @@ static int xt_osf_add_callback(struct net *net, struct 
sock *ctnl,
 
f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
 
-   kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL);
+   kf = kzalloc(sizeof(*kf), GFP_KERNEL);
if (!kf)
return -ENOMEM;
 
-   memcpy(>finger, f, sizeof(struct xt_osf_user_finger));
+   copy_user_finger(>finger, f);
 
list_for_each_entry(sf, _osf_fingers[!!f->df], finger_entry) {
if (memcmp(>finger, f, sizeof(struct xt_osf_user_finger)))
-- 
1.9.1




[PATCH nf v5] netfilter: seqadj: Fix the wrong ack adjust for the RST packet without ack

2016-09-22 Thread fgao
From: Gao Feng 

It is valid that the TCP RST packet which does not set ack flag, and bytes
of ack number are zero. But current seqadj codes would adjust the "0" ack
to invalid ack number. Actually seqadj need to check the ack flag before
adjust it for these RST packets.

The following is my test case

client is 10.26.98.245, and add one iptable rule:
iptables  -I INPUT -p tcp --sport 12345 -m connbytes --connbytes 2:
--connbytes-dir reply --connbytes-mode packets -j REJECT --reject-with
tcp-reset
This iptables rule could generate on TCP RST without ack flag.

server:10.172.135.55
Enable the synproxy with seqadjust by the following iptables rules
iptables -t raw -A PREROUTING -i eth0 -p tcp -d 10.172.135.55 --dport 12345
-m tcp --syn -j CT --notrack

iptables -A INPUT -i eth0 -p tcp -d 10.172.135.55 --dport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -j SYNPROXY --sack-perm --timestamp --wscale 7
--mss 1460
iptables -A OUTPUT -o eth0 -p tcp -s 10.172.135.55 --sport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -m tcp --tcp-flags SYN,RST,ACK SYN,ACK -j ACCEPT

The following is my test result.

1. packet trace on client
root@routers:/tmp# tcpdump -i eth0 tcp port 12345 -n
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [S], seq 3695959829,
win 29200, options [mss 1460,sackOK,TS val 452367884 ecr 0,nop,wscale 7],
length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [S.], seq 546723266,
ack 3695959830, win 0, options [mss 1460,sackOK,TS val 15643479 ecr 452367884,
nop,wscale 7], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [.], ack 1, win 229,
options [nop,nop,TS val 452367885 ecr 15643479], length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [.], ack 1, win 226,
options [nop,nop,TS val 15643479 ecr 452367885], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [R], seq 3695959830,
win 0, length 0

2. seqadj log on server
[62873.867319] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.867644] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.869040] Adjusting sequence number from 3695959830->3695959830,
ack from 0->55618628

To summarize, it is clear that the seqadj codes adjust the 0 ack when receive
one TCP RST packet without ack.

Signed-off-by: Gao Feng 
---
 v5: Use goto to decrease the patch size
 v4: Don't invoke nf_ct_sack_adjust when no ack flag
 v3: Add the reproduce steps and packet trace
 v2: Regenerate because the first patch is removed
 v1: Initial patch

 net/netfilter/nf_conntrack_seqadj.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c 
b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..08d0640 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -169,7 +169,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
s32 seqoff, ackoff;
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
struct nf_ct_seqadj *this_way, *other_way;
-   int res;
+   int res = 1;
 
this_way  = >seq[dir];
other_way = >seq[!dir];
@@ -184,27 +184,31 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
else
seqoff = this_way->offset_before;
 
+   newseq = htonl(ntohl(tcph->seq) + seqoff);
+   inet_proto_csum_replace4(>check, skb, tcph->seq, newseq, false);
+   pr_debug("Adjusting sequence number from %u->%u\n",
+ntohl(tcph->seq), ntohl(newseq));
+   tcph->seq = newseq;
+
+   if (unlikely(!tcph->ack))
+   goto out;
+
if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
  other_way->correction_pos))
ackoff = other_way->offset_after;
else
ackoff = other_way->offset_before;
 
-   newseq = htonl(ntohl(tcph->seq) + seqoff);
newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
-   inet_proto_csum_replace4(>check, skb, tcph->seq, newseq, false);
inet_proto_csum_replace4(>check, skb, tcph->ack_seq, newack,
 false);
-
-   pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+   pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n",
 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
 ntohl(newack));
-
-   tcph->seq = newseq;
tcph->ack_seq = newack;
 
res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+out:
spin_unlock_bh(>lock);
 
return res;
-- 
1.9.1



[PATCH nf v4] netfilter: seqadj: Fix the wrong ack adjust for the RST packet without ack

2016-09-22 Thread fgao
From: Gao Feng 

It is valid that the TCP RST packet which does not set ack flag, and bytes
of ack number are zero. But current seqadj codes would adjust the "0" ack
to invalid ack number. Actually seqadj need to check the ack flag before
adjust it for these RST packets.

The following is my test case

client is 10.26.98.245, and add one iptable rule:
iptables  -I INPUT -p tcp --sport 12345 -m connbytes --connbytes 2:
--connbytes-dir reply --connbytes-mode packets -j REJECT --reject-with
tcp-reset
This iptables rule could generate on TCP RST without ack flag.

server:10.172.135.55
Enable the synproxy with seqadjust by the following iptables rules
iptables -t raw -A PREROUTING -i eth0 -p tcp -d 10.172.135.55 --dport 12345
-m tcp --syn -j CT --notrack

iptables -A INPUT -i eth0 -p tcp -d 10.172.135.55 --dport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -j SYNPROXY --sack-perm --timestamp --wscale 7
--mss 1460
iptables -A OUTPUT -o eth0 -p tcp -s 10.172.135.55 --sport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -m tcp --tcp-flags SYN,RST,ACK SYN,ACK -j ACCEPT

The following is my test result.

1. packet trace on client
root@routers:/tmp# tcpdump -i eth0 tcp port 12345 -n
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [S], seq 3695959829,
win 29200, options [mss 1460,sackOK,TS val 452367884 ecr 0,nop,wscale 7],
length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [S.], seq 546723266,
ack 3695959830, win 0, options [mss 1460,sackOK,TS val 15643479 ecr 452367884,
nop,wscale 7], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [.], ack 1, win 229,
options [nop,nop,TS val 452367885 ecr 15643479], length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [.], ack 1, win 226,
options [nop,nop,TS val 15643479 ecr 452367885], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [R], seq 3695959830,
win 0, length 0

2. seqadj log on server
[62873.867319] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.867644] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.869040] Adjusting sequence number from 3695959830->3695959830,
ack from 0->55618628

To summarize, it is clear that the seqadj codes adjust the 0 ack when receive
one TCP RST packet without ack.

Signed-off-by: Gao Feng 
---
 v4: Don't invoke nf_ct_sack_adjust when no ack flag
 v3: Add the reproduce steps and packet trace
 v2: Regenerate because the first patch is removed
 v1: Initial patch

 net/netfilter/nf_conntrack_seqadj.c | 37 -
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c 
b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..80ab429 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -169,7 +169,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
s32 seqoff, ackoff;
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
struct nf_ct_seqadj *this_way, *other_way;
-   int res;
+   int res = 1;
 
this_way  = >seq[dir];
other_way = >seq[!dir];
@@ -184,27 +184,30 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
else
seqoff = this_way->offset_before;
 
-   if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
-   ackoff = other_way->offset_after;
-   else
-   ackoff = other_way->offset_before;
-
newseq = htonl(ntohl(tcph->seq) + seqoff);
-   newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
inet_proto_csum_replace4(>check, skb, tcph->seq, newseq, false);
-   inet_proto_csum_replace4(>check, skb, tcph->ack_seq, newack,
-false);
+   pr_debug("Adjusting sequence number from %u->%u\n",
+ntohl(tcph->seq), ntohl(newseq));
+   tcph->seq = newseq;
 
-   pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-ntohl(newack));
+   if (likely(tcph->ack)) {
+   if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+ other_way->correction_pos))
+   ackoff = other_way->offset_after;
+   else
+   ackoff = other_way->offset_before;
 
-   tcph->seq = newseq;
-   tcph->ack_seq = newack;
+   newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+   inet_proto_csum_replace4(>check, skb, tcph->ack_seq,
+newack, false);
+   pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n",
+ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),

[PATCH nf v3] netfilter: seqadj: Fix the wrong ack adjust for the RST packet without ack

2016-09-21 Thread fgao
From: Gao Feng 

It is valid that the TCP RST packet which does not set ack flag, and bytes
of ack number are zero. But current seqadj codes would adjust the "0" ack
to invalid ack number. Actually seqadj need to check the ack flag before
adjust it for these RST packets.

The following is my test case

client is 10.26.98.245, and add one iptable rule:
iptables  -I INPUT -p tcp --sport 12345 -m connbytes --connbytes 2:
--connbytes-dir reply --connbytes-mode packets -j REJECT --reject-with
tcp-reset
This iptables rule could generate on TCP RST without ack flag.

server:10.172.135.55
Enable the synproxy with seqadjust by the following iptables rules
iptables -t raw -A PREROUTING -i eth0 -p tcp -d 10.172.135.55 --dport 12345
-m tcp --syn -j CT --notrack

iptables -A INPUT -i eth0 -p tcp -d 10.172.135.55 --dport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -j SYNPROXY --sack-perm --timestamp --wscale 7
--mss 1460
iptables -A OUTPUT -o eth0 -p tcp -s 10.172.135.55 --sport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -m tcp --tcp-flags SYN,RST,ACK SYN,ACK -j ACCEPT

The following is my test result.

1. packet trace on client
root@routers:/tmp# tcpdump -i eth0 tcp port 12345 -n
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [S], seq 3695959829,
win 29200, options [mss 1460,sackOK,TS val 452367884 ecr 0,nop,wscale 7],
length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [S.], seq 546723266,
ack 3695959830, win 0, options [mss 1460,sackOK,TS val 15643479 ecr 452367884,
nop,wscale 7], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [.], ack 1, win 229,
options [nop,nop,TS val 452367885 ecr 15643479], length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [.], ack 1, win 226,
options [nop,nop,TS val 15643479 ecr 452367885], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [R], seq 3695959830,
win 0, length 0

2. seqadj log on server
[62873.867319] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.867644] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.869040] Adjusting sequence number from 3695959830->3695959830,
ack from 0->55618628

To summarize, it is clear that the seqadj codes adjust the 0 ack when receive
one TCP RST packet without ack.

Signed-off-by: Gao Feng 
---
 v3: Add the reproduce steps and packet trace
 v2: Regenerate because the first patch is removed
 v1: Initial patch

 net/netfilter/nf_conntrack_seqadj.c | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c 
b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..3bd9c7e 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -179,30 +179,34 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
 
tcph = (void *)skb->data + protoff;
spin_lock_bh(>lock);
+
if (after(ntohl(tcph->seq), this_way->correction_pos))
seqoff = this_way->offset_after;
else
seqoff = this_way->offset_before;
 
-   if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
-   ackoff = other_way->offset_after;
-   else
-   ackoff = other_way->offset_before;
-
newseq = htonl(ntohl(tcph->seq) + seqoff);
-   newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
inet_proto_csum_replace4(>check, skb, tcph->seq, newseq, false);
-   inet_proto_csum_replace4(>check, skb, tcph->ack_seq, newack,
-false);
-
-   pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-ntohl(newack));
 
+   pr_debug("Adjusting sequence number from %u->%u\n",
+ntohl(tcph->seq), ntohl(newseq));
tcph->seq = newseq;
-   tcph->ack_seq = newack;
+
+   if (likely(tcph->ack)) {
+   if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+ other_way->correction_pos))
+   ackoff = other_way->offset_after;
+   else
+   ackoff = other_way->offset_before;
+
+   newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+   inet_proto_csum_replace4(>check, skb, tcph->ack_seq,
+newack, false);
+
+   pr_debug("Adjusting ack number from %u->%u\n",
+ntohl(tcph->ack_seq), ntohl(newack));
+   tcph->ack_seq = newack;
+   }
 
res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
spin_unlock_bh(>lock);
-- 
1.9.1



[PATCH v5 nf] netfilter: seqadj: Drop the packet directly when fail to add seqadj extension to avoid dereference NULL pointer later

2016-09-06 Thread fgao
From: Gao Feng 

When memory is exhausted, nfct_seqadj_ext_add may fail to add the seqadj
extension. But the function nf_ct_seqadj_init doesn't check if get valid
seqadj pointer by the nfct_seqadj.

Now drop the packet directly when fail to add seqadj extension to avoid
dereference NULL pointer in nf_ct_seqadj_init.

Signed-off-by: Gao Feng 
---
 v5: Return NF_ACCEPT instead of NF_DROP when nfct_seqadj_ext_add failed in 
nf_nat_setup_info
 v4: Drop the packet directly when fail to add seqadj extension;
 v3: Remove the warning log when seqadj is null;
 v2: Remove the unnessary seqadj check in nf_ct_seq_adjust
 v1: Initial patch

 net/netfilter/nf_conntrack_core.c | 6 +-
 net/netfilter/nf_nat_core.c   | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index dd2c43a..dfa76ce 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1036,7 +1036,11 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return (struct nf_conntrack_tuple_hash *)ct;
 
if (tmpl && nfct_synproxy(tmpl)) {
-   nfct_seqadj_ext_add(ct);
+   if (!nfct_seqadj_ext_add(ct)) {
+   nf_conntrack_free(ct);
+   pr_debug("Can't add seqadj extension\n");
+   return NULL;
+   }
nfct_synproxy_ext_add(ct);
}
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index de31818..f8b916a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -441,7 +441,8 @@ nf_nat_setup_info(struct nf_conn *ct,
ct->status |= IPS_DST_NAT;
 
if (nfct_help(ct))
-   nfct_seqadj_ext_add(ct);
+   if (!nfct_seqadj_ext_add(ct))
+   return NF_ACCEPT;
}
 
if (maniptype == NF_NAT_MANIP_SRC) {
-- 
1.9.1




[PATCH nf] netfilter: seqadj: Fix the wrong ack adjust for the RST packet without ack

2016-09-05 Thread fgao
From: Gao Feng 

It is valid that the TCP RST packet which does not set ack flag, and bytes
of ack number are zero. For these RST packets, seqadj could not adjust the
ack number.

Signed-off-by: Gao Feng 
---
 v2: Regenerate because the first patch is removed
 v1: Initial patch

 net/netfilter/nf_conntrack_seqadj.c | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c 
b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..3bd9c7e 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -179,30 +179,34 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
 
tcph = (void *)skb->data + protoff;
spin_lock_bh(>lock);
+
if (after(ntohl(tcph->seq), this_way->correction_pos))
seqoff = this_way->offset_after;
else
seqoff = this_way->offset_before;
 
-   if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
-   ackoff = other_way->offset_after;
-   else
-   ackoff = other_way->offset_before;
-
newseq = htonl(ntohl(tcph->seq) + seqoff);
-   newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
inet_proto_csum_replace4(>check, skb, tcph->seq, newseq, false);
-   inet_proto_csum_replace4(>check, skb, tcph->ack_seq, newack,
-false);
-
-   pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-ntohl(newack));
 
+   pr_debug("Adjusting sequence number from %u->%u\n",
+ntohl(tcph->seq), ntohl(newseq));
tcph->seq = newseq;
-   tcph->ack_seq = newack;
+
+   if (likely(tcph->ack)) {
+   if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+ other_way->correction_pos))
+   ackoff = other_way->offset_after;
+   else
+   ackoff = other_way->offset_before;
+
+   newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+   inet_proto_csum_replace4(>check, skb, tcph->ack_seq,
+newack, false);
+
+   pr_debug("Adjusting ack number from %u->%u\n",
+ntohl(tcph->ack_seq), ntohl(newack));
+   tcph->ack_seq = newack;
+   }
 
res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
spin_unlock_bh(>lock);
-- 
1.9.1




[PATCH v4 nf] netfilter: seqadj: Drop the packet directly when fail to add seqadj extension to avoid dereference NULL pointer later

2016-09-05 Thread fgao
From: Gao Feng 

When memory is exhausted, nfct_seqadj_ext_add may fail to add the seqadj
extension. But the function nf_ct_seqadj_init doesn't check if get valid
seqadj pointer by the nfct_seqadj.

Now drop the packet directly when fail to add seqadj extension to avoid
dereference NULL pointer in nf_ct_seqadj_init.

Signed-off-by: Gao Feng 
---
 v4: Drop the packet directly when fail to add seqadj extension;
 v3: Remove the warning log when seqadj is null;
 v2: Remove the unnessary seqadj check in nf_ct_seq_adjust
 v1: Initial patch

 net/netfilter/nf_conntrack_core.c | 6 +-
 net/netfilter/nf_nat_core.c   | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index dd2c43a..dfa76ce 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1036,7 +1036,11 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return (struct nf_conntrack_tuple_hash *)ct;
 
if (tmpl && nfct_synproxy(tmpl)) {
-   nfct_seqadj_ext_add(ct);
+   if (!nfct_seqadj_ext_add(ct)) {
+   nf_conntrack_free(ct);
+   pr_debug("Can't add seqadj extension\n");
+   return NULL;
+   }
nfct_synproxy_ext_add(ct);
}
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index de31818..b82282a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -441,7 +441,8 @@ nf_nat_setup_info(struct nf_conn *ct,
ct->status |= IPS_DST_NAT;
 
if (nfct_help(ct))
-   nfct_seqadj_ext_add(ct);
+   if (!nfct_seqadj_ext_add(ct))
+   return NF_DROP;
}
 
if (maniptype == NF_NAT_MANIP_SRC) {
-- 
1.9.1




[PATCH 2/2 nf] netfilter: seqadj: Fix the wrong ack adjust for the RST packet without ack

2016-09-05 Thread fgao
From: Gao Feng 

It is valid that the TCP RST packet which does not set ack flag, and bytes
of ack number are zero. For these RST packets, seqadj could not adjust the
ack number.

Signed-off-by: Gao Feng 
---
 net/netfilter/nf_conntrack_seqadj.c | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c 
b/net/netfilter/nf_conntrack_seqadj.c
index 7f8d814..65bb4a6 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -182,30 +182,34 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
 
tcph = (void *)skb->data + protoff;
spin_lock_bh(>lock);
+
if (after(ntohl(tcph->seq), this_way->correction_pos))
seqoff = this_way->offset_after;
else
seqoff = this_way->offset_before;
 
-   if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
-   ackoff = other_way->offset_after;
-   else
-   ackoff = other_way->offset_before;
-
newseq = htonl(ntohl(tcph->seq) + seqoff);
-   newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
inet_proto_csum_replace4(>check, skb, tcph->seq, newseq, false);
-   inet_proto_csum_replace4(>check, skb, tcph->ack_seq, newack,
-false);
-
-   pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-ntohl(newack));
 
+   pr_debug("Adjusting sequence number from %u->%u\n",
+ntohl(tcph->seq), ntohl(newseq));
tcph->seq = newseq;
-   tcph->ack_seq = newack;
+
+   if (likely(tcph->ack)) {
+   if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+ other_way->correction_pos))
+   ackoff = other_way->offset_after;
+   else
+   ackoff = other_way->offset_before;
+
+   newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+   inet_proto_csum_replace4(>check, skb, tcph->ack_seq,
+newack, false);
+
+   pr_debug("Adjusting ack number from %u->%u\n",
+ntohl(tcph->ack_seq), ntohl(newack));
+   tcph->ack_seq = newack;
+   }
 
res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
spin_unlock_bh(>lock);
-- 
1.9.1




[PATCH v3 nf] netfilter: seqadj: Fix one possible panic in seqadj when mem is exhausted

2016-09-03 Thread fgao
From: Gao Feng 

When memory is exhausted, nfct_seqadj_ext_add may fail to add the seqadj
extension. But the function nf_ct_seqadj_init doesn't check if get valid
seqadj pointer by the nfct_seqadj, while other functions perform the
sanity check.

So the system would be panic when nfct_seqadj_ext_add failed.

Signed-off-by: Gao Feng 
---
 v3: Remove the warning log when seqadj is null;
 v2: Remove the unnessary seqadj check in nf_ct_seq_adjust
 v1: Initial patch

 net/netfilter/nf_conntrack_seqadj.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c 
b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..7f8d814 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -16,9 +16,12 @@ int nf_ct_seqadj_init(struct nf_conn *ct, enum 
ip_conntrack_info ctinfo,
if (off == 0)
return 0;
 
+   seqadj = nfct_seqadj(ct);
+   if (unlikely(!seqadj))
+   return 0;
+
set_bit(IPS_SEQ_ADJUST_BIT, >status);
 
-   seqadj = nfct_seqadj(ct);
this_way = >seq[dir];
this_way->offset_before  = off;
this_way->offset_after   = off;
-- 
1.9.1




[PATCH v2 1/2 nf] netfilter: seqadj: Fix one possible panic in seqadj when mem is exhausted

2016-09-01 Thread fgao
From: Gao Feng 

When memory is exhausted, nfct_seqadj_ext_add may fail to add the seqadj
extension. But the function nf_ct_seqadj_init doesn't check if get valid
seqadj pointer by the nfct_seqadj, while other functions perform the
sanity check.

So the system would be panic when nfct_seqadj_ext_add failed.

Signed-off-by: Gao Feng 
---
 v2: Remove the unnessary seqadj check in nf_ct_seq_adjust
 v1: Inital patch

 net/netfilter/nf_conntrack_seqadj.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c 
b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..2c8e201 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -16,9 +16,14 @@ int nf_ct_seqadj_init(struct nf_conn *ct, enum 
ip_conntrack_info ctinfo,
if (off == 0)
return 0;
 
+   seqadj = nfct_seqadj(ct);
+   if (unlikely(!seqadj)) {
+   WARN_ONCE(1, "Missing nfct_seqadj_ext_add() setup call\n");
+   return 0;
+   }
+
set_bit(IPS_SEQ_ADJUST_BIT, >status);
 
-   seqadj = nfct_seqadj(ct);
this_way = >seq[dir];
this_way->offset_before  = off;
this_way->offset_after   = off;
-- 
1.9.1




[PATCH 2/2 nf-next] netfilter: seqadj: print the warning log when fail to add seqadj extension

2016-09-01 Thread fgao
From: Gao Feng 

Print the warning log when fail to add seqadj extension like
nf_ct_acct_ext_add does. It could be helpful to find the problem.

Signed-off-by: Gao Feng 
---
 include/net/netfilter/nf_conntrack_seqadj.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/net/netfilter/nf_conntrack_seqadj.h 
b/include/net/netfilter/nf_conntrack_seqadj.h
index 4b33629..d548b9c 100644
--- a/include/net/netfilter/nf_conntrack_seqadj.h
+++ b/include/net/netfilter/nf_conntrack_seqadj.h
@@ -27,7 +27,13 @@ static inline struct nf_conn_seqadj *nfct_seqadj(const 
struct nf_conn *ct)
 
 static inline struct nf_conn_seqadj *nfct_seqadj_ext_add(struct nf_conn *ct)
 {
-   return nf_ct_ext_add(ct, NF_CT_EXT_SEQADJ, GFP_ATOMIC);
+   struct nf_conn_seqadj *seqadj = nf_ct_ext_add(ct, NF_CT_EXT_SEQADJ,
+ GFP_ATOMIC);
+
+   if (!seqadj)
+   pr_warn("failed to add seqadj extension area");
+
+   return seqadj;
 }
 
 int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
-- 
1.9.1




[PATCH 1/2 nf] netfilter: seqadj: Fix some possible panics of seqadj when mem is exhausted

2016-09-01 Thread fgao
From: Gao Feng 

When memory is exhausted, nfct_seqadj_ext_add may fail to add the seqadj
extension. But these interface functions nf_ct_seqadj_init and
nf_ct_seq_adjust don't check if they get the valid seqadj pointer by the
nfct_seqadj, while nf_ct_seqadj_set and nf_ct_seq_offset perform that
check. So the system would be panic when nfct_seqadj_ext_add failed.

Signed-off-by: Gao Feng 
---
 net/netfilter/nf_conntrack_seqadj.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c 
b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..2a2fd0e 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -16,9 +16,14 @@ int nf_ct_seqadj_init(struct nf_conn *ct, enum 
ip_conntrack_info ctinfo,
if (off == 0)
return 0;
 
+   seqadj = nfct_seqadj(ct);
+   if (unlikely(!seqadj)) {
+   WARN_ONCE(1, "Missing nfct_seqadj_ext_add() setup call\n");
+   return 0;
+   }
+
set_bit(IPS_SEQ_ADJUST_BIT, >status);
 
-   seqadj = nfct_seqadj(ct);
this_way = >seq[dir];
this_way->offset_before  = off;
this_way->offset_after   = off;
@@ -171,6 +176,11 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
struct nf_ct_seqadj *this_way, *other_way;
int res;
 
+   if (unlikely(!seqadj)) {
+   WARN_ONCE(1, "Missing nfct_seqadj_ext_add() setup call\n");
+   return 0;
+   }
+
this_way  = >seq[dir];
other_way = >seq[!dir];
 
@@ -218,8 +228,10 @@ s32 nf_ct_seq_offset(const struct nf_conn *ct,
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
struct nf_ct_seqadj *this_way;
 
-   if (!seqadj)
+   if (unlikely(!seqadj)) {
+   WARN_ONCE(1, "Missing nfct_seqadj_ext_add() setup call\n");
return 0;
+   }
 
this_way = >seq[dir];
return after(seq, this_way->correction_pos) ?
-- 
1.9.1




[PATCH v2 net] rps: flow_dissector: Fix uninitialized flow_keys used in __skb_get_hash possibly

2016-08-31 Thread fgao
From: Gao Feng 

The original codes depend on that the function parameters are evaluated from
left to right. But the parameter's evaluation order is not defined in C
standard actually.

When flow_keys_have_l4() is invoked before ___skb_get_hash(skb, ,
hashrnd) with some compilers or environment, the keys passed to
flow_keys_have_l4 is not initialized.

Fixes: 6db61d79c1e1 ("flow_dissector: Ignore flow dissector return value from 
___skb_get_hash")

Acked-by: Eric Dumazet 
Signed-off-by: Gao Feng 
---
 v2: Add fixed commit description according to Eric's advice
 v1: Inital patch

 net/core/flow_dissector.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f..52742a0 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -680,11 +680,13 @@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
 void __skb_get_hash(struct sk_buff *skb)
 {
struct flow_keys keys;
+   u32 hash;
 
__flow_hash_secret_init();
 
-   __skb_set_sw_hash(skb, ___skb_get_hash(skb, , hashrnd),
- flow_keys_have_l4());
+   hash = ___skb_get_hash(skb, , hashrnd);
+
+   __skb_set_sw_hash(skb, hash, flow_keys_have_l4());
 }
 EXPORT_SYMBOL(__skb_get_hash);
 
-- 
1.9.1




[PATCH net-next] rps: flow_dissector: Add the const for the parameter of flow_keys_have_l4

2016-08-30 Thread fgao
From: Gao Feng 

Add the const for the parameter of flow_keys_have_l4 for the readability.

Signed-off-by: Gao Feng 
---
 include/net/flow_dissector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index f266b51..d953492 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -183,7 +183,7 @@ struct flow_keys_digest {
 void make_flow_keys_digest(struct flow_keys_digest *digest,
   const struct flow_keys *flow);
 
-static inline bool flow_keys_have_l4(struct flow_keys *keys)
+static inline bool flow_keys_have_l4(const struct flow_keys *keys)
 {
return (keys->ports.ports || keys->tags.flow_label);
 }
-- 
1.9.1




[PATCH net] rps: flow_dissector: Fix uninitialized flow_keys used in __skb_get_hash possibly

2016-08-30 Thread fgao
From: Gao Feng 

The original codes depend on that the function parameters are evaluated from
left to right. But the parameter's evaluation order is not defined in C
standard actually.

When flow_keys_have_l4() is invoked before ___skb_get_hash(skb, ,
hashrnd) with some compilers or environment, the keys passed to
flow_keys_have_l4 is not initialized.

Signed-off-by: Gao Feng 
---
 net/core/flow_dissector.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f..52742a0 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -680,11 +680,13 @@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
 void __skb_get_hash(struct sk_buff *skb)
 {
struct flow_keys keys;
+   u32 hash;
 
__flow_hash_secret_init();
 
-   __skb_set_sw_hash(skb, ___skb_get_hash(skb, , hashrnd),
- flow_keys_have_l4());
+   hash = ___skb_get_hash(skb, , hashrnd);
+
+   __skb_set_sw_hash(skb, hash, flow_keys_have_l4());
 }
 EXPORT_SYMBOL(__skb_get_hash);
 
-- 
1.9.1




[PATCH v2 nf] netfilter: log: Check param to avoid overflow in nf_log_set

2016-08-29 Thread fgao
From: Gao Feng 

The nf_log_set is an interface function, so it should do the strict sanity
check of parameters. Convert the return value of nf_log_set as int instead
of void. When the pf is invalid, return -EOPNOTSUPP.

Signed-off-by: Gao Feng 
---
 v2: Use ARRAY_SIZE(net->nf.nf_loggers) instead of NFPROTO_NUMPROTO;
 Return error code -EOPNOTSUPP when pf is invalid;
 v1: Initial patch

 include/net/netfilter/nf_log.h   | 2 +-
 net/bridge/netfilter/nf_log_bridge.c | 3 +--
 net/ipv4/netfilter/nf_log_arp.c  | 3 +--
 net/ipv4/netfilter/nf_log_ipv4.c | 3 +--
 net/ipv6/netfilter/nf_log_ipv6.c | 3 +--
 net/netfilter/nf_log.c   | 8 +---
 6 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 83d855b..f4eebd0 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -60,7 +60,7 @@ struct nf_logger {
 int nf_log_register(u_int8_t pf, struct nf_logger *logger);
 void nf_log_unregister(struct nf_logger *logger);
 
-void nf_log_set(struct net *net, u_int8_t pf,
+int nf_log_set(struct net *net, u_int8_t pf,
const struct nf_logger *logger);
 void nf_log_unset(struct net *net, const struct nf_logger *logger);
 
diff --git a/net/bridge/netfilter/nf_log_bridge.c 
b/net/bridge/netfilter/nf_log_bridge.c
index 5d9953a..1663df5 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -50,8 +50,7 @@ static struct nf_logger nf_bridge_logger __read_mostly = {
 
 static int __net_init nf_log_bridge_net_init(struct net *net)
 {
-   nf_log_set(net, NFPROTO_BRIDGE, _bridge_logger);
-   return 0;
+   return nf_log_set(net, NFPROTO_BRIDGE, _bridge_logger);
 }
 
 static void __net_exit nf_log_bridge_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index e7ad950..73599f2 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -111,8 +111,7 @@ static struct nf_logger nf_arp_logger __read_mostly = {
 
 static int __net_init nf_log_arp_net_init(struct net *net)
 {
-   nf_log_set(net, NFPROTO_ARP, _arp_logger);
-   return 0;
+   return nf_log_set(net, NFPROTO_ARP, _arp_logger);
 }
 
 static void __net_exit nf_log_arp_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 076aadd..20f2255 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -347,8 +347,7 @@ static struct nf_logger nf_ip_logger __read_mostly = {
 
 static int __net_init nf_log_ipv4_net_init(struct net *net)
 {
-   nf_log_set(net, NFPROTO_IPV4, _ip_logger);
-   return 0;
+   return nf_log_set(net, NFPROTO_IPV4, _ip_logger);
 }
 
 static void __net_exit nf_log_ipv4_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 8dd8696..c1bcf69 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -379,8 +379,7 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
 
 static int __net_init nf_log_ipv6_net_init(struct net *net)
 {
-   nf_log_set(net, NFPROTO_IPV6, _ip6_logger);
-   return 0;
+   return nf_log_set(net, NFPROTO_IPV6, _ip6_logger);
 }
 
 static void __net_exit nf_log_ipv6_net_exit(struct net *net)
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index aa5847a..30a17d6 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -39,12 +39,12 @@ static struct nf_logger *__find_logger(int pf, const char 
*str_logger)
return NULL;
 }
 
-void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
 {
const struct nf_logger *log;
 
-   if (pf == NFPROTO_UNSPEC)
-   return;
+   if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers))
+   return -EOPNOTSUPP;
 
mutex_lock(_log_mutex);
log = nft_log_dereference(net->nf.nf_loggers[pf]);
@@ -52,6 +52,8 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct 
nf_logger *logger)
rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
 
mutex_unlock(_log_mutex);
+
+   return 0;
 }
 EXPORT_SYMBOL(nf_log_set);
 
-- 
1.9.1



[PATCH nf-next] netfilter: log: Check param to avoid overflow in nf_log_set

2016-08-28 Thread fgao
From: Gao Feng 

The nf_log_set is an interface function, so it should do the strict sanity
check of parameters. Add  one sanity check for pf, it could not exceed
NFPROTO_NUMPROTO, and print error log when pf is invalid.

Signed-off-by: Gao Feng 
---
 net/netfilter/nf_log.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index aa5847a..02ce0b9 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -43,8 +43,10 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct 
nf_logger *logger)
 {
const struct nf_logger *log;
 
-   if (pf == NFPROTO_UNSPEC)
+   if (pf == NFPROTO_UNSPEC || pf >= NFPROTO_NUMPROTO) {
+   pr_err("Wrong pf(%d) for nf log", pf);
return;
+   }
 
mutex_lock(_log_mutex);
log = nft_log_dereference(net->nf.nf_loggers[pf]);
-- 
1.9.1




[PATCH v1 1/1 net-next] 8139cp: Fix one possible deadloop in cp_rx_poll

2016-08-24 Thread fgao
From: Gao Feng 

When cp_rx_poll does not get enough packet, it will check the rx
interrupt status again. If so, it will jumpt to rx_status_loop again.
But the goto jump resets the rx variable as zero too.

As a result, it causes one possible deadloop. Assume this case,
rx_status_loop only gets the packet count which is less than budget,
and (cpr16(IntrStatus) & cp_rx_intr_mask) condition is always true.
It causes the deadloop happens and system is blocked.

Signed-off-by: Gao Feng 
---
 drivers/net/ethernet/realtek/8139cp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/8139cp.c 
b/drivers/net/ethernet/realtek/8139cp.c
index deae10d..5297bf7 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -467,8 +467,8 @@ static int cp_rx_poll(struct napi_struct *napi, int budget)
unsigned int rx_tail = cp->rx_tail;
int rx;
 
-rx_status_loop:
rx = 0;
+rx_status_loop:
cpw16(IntrStatus, cp_rx_intr_mask);
 
while (rx < budget) {
-- 
1.9.1




[PATCH v2 1/2 net-next] netfilter: gre: Use consistent GRE_* macros instead of ones defined by netfilter.

2016-08-22 Thread fgao
From: Gao Feng 

There are already some GRE_* macros in kernel, so it is unnecessary
to define these macros. And remove some useless macros

Signed-off-by: Gao Feng 
---
 v2: Split the original patch to review easily
 v1: Intial patch

 include/linux/netfilter/nf_conntrack_proto_gre.h | 22 ++
 include/uapi/linux/if_tunnel.h   |  1 +
 net/ipv4/netfilter/nf_nat_proto_gre.c|  4 ++--
 net/netfilter/nf_conntrack_proto_gre.c   |  4 ++--
 4 files changed, 7 insertions(+), 24 deletions(-)

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h 
b/include/linux/netfilter/nf_conntrack_proto_gre.h
index df78dc2..0189747 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -1,29 +1,11 @@
 #ifndef _CONNTRACK_PROTO_GRE_H
 #define _CONNTRACK_PROTO_GRE_H
 #include 
+#include 
+#include 
 
 /* GRE PROTOCOL HEADER */
 
-/* GRE Version field */
-#define GRE_VERSION_1701   0x0
-#define GRE_VERSION_PPTP   0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP  0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C 0x80
-#define GRE_FLAG_R 0x40
-#define GRE_FLAG_K 0x20
-#define GRE_FLAG_S 0x10
-#define GRE_FLAG_A 0x80
-
-#define GRE_IS_C(f)((f)_FLAG_C)
-#define GRE_IS_R(f)((f)_FLAG_R)
-#define GRE_IS_K(f)((f)_FLAG_K)
-#define GRE_IS_S(f)((f)_FLAG_S)
-#define GRE_IS_A(f)((f)_FLAG_A)
-
 /* GRE is a mess: Four different standards */
 struct gre_hdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 361b9f0..1b27e2c 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -36,6 +36,7 @@
 #define GRE_IS_REC(f)  ((f) & GRE_REC)
 #define GRE_IS_ACK(f)  ((f) & GRE_ACK)
 
+#define GRE_VERSION_0  __cpu_to_be16(0x)
 #define GRE_VERSION_1  __cpu_to_be16(0x0001)
 #define GRE_PROTO_PPP  __cpu_to_be16(0x880b)
 #define GRE_PPTP_KEY_MASK  __cpu_to_be32(0x)
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c 
b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 9414923..93198d7 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -104,11 +104,11 @@ gre_manip_pkt(struct sk_buff *skb,
if (maniptype != NF_NAT_MANIP_DST)
return true;
switch (greh->version) {
-   case GRE_VERSION_1701:
+   case ntohs(GRE_VERSION_0):
/* We do not currently NAT any GREv0 packets.
 * Try to behave like "nf_nat_proto_unknown" */
break;
-   case GRE_VERSION_PPTP:
+   case ntohs(GRE_VERSION_1):
pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c 
b/net/netfilter/nf_conntrack_proto_gre.c
index a96451a..deb239a 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -200,7 +200,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, 
unsigned int dataoff,
 
/* first only delinearize old RFC1701 GRE header */
grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
-   if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+   if (!grehdr || grehdr->version != ntohs(GRE_VERSION_1)) {
/* try to behave like "nf_conntrack_proto_generic" */
tuple->src.u.all = 0;
tuple->dst.u.all = 0;
@@ -212,7 +212,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, 
unsigned int dataoff,
if (!pgrehdr)
return true;
 
-   if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
+   if (grehdr->protocol != GRE_PROTO_PPP) {
pr_debug("GRE_VERSION_PPTP but unknown proto\n");
return false;
}
-- 
1.9.1



[PATCH v2 2/2 net-next] netfilter: gre: Use consistent GRE and PTTP header structure instead of the ones defined by netfilter

2016-08-22 Thread fgao
From: Gao Feng 

There are two existing strutures which defines the GRE and PPTP header.
So use these two structures instead of the ones defined by netfilter to
keep consitent with other codes.

Signed-off-by: Gao Feng 
---
 v2: Split the original patch to review easily
 v1: Intial patch

 include/linux/netfilter/nf_conntrack_proto_gre.h | 42 
 net/ipv4/netfilter/nf_nat_proto_gre.c| 13 
 net/netfilter/nf_conntrack_proto_gre.c   | 12 +++
 3 files changed, 13 insertions(+), 54 deletions(-)

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h 
b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 0189747..dee0acd 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -4,48 +4,6 @@
 #include 
 #include 
 
-/* GRE PROTOCOL HEADER */
-
-/* GRE is a mess: Four different standards */
-struct gre_hdr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-   __u16   rec:3,
-   srr:1,
-   seq:1,
-   key:1,
-   routing:1,
-   csum:1,
-   version:3,
-   reserved:4,
-   ack:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-   __u16   csum:1,
-   routing:1,
-   key:1,
-   seq:1,
-   srr:1,
-   rec:3,
-   ack:1,
-   reserved:4,
-   version:3;
-#else
-#error "Adjust your  defines"
-#endif
-   __be16  protocol;
-};
-
-/* modified GRE header for PPTP */
-struct gre_hdr_pptp {
-   __u8   flags;   /* bitfield */
-   __u8   version; /* should be GRE_VERSION_PPTP */
-   __be16 protocol;/* should be GRE_PROTOCOL_PPTP */
-   __be16 payload_len; /* size of ppp payload, not inc. gre header */
-   __be16 call_id; /* peer's call_id for this session */
-   __be32 seq; /* sequence number.  Present if S==1 */
-   __be32 ack; /* seq number of highest packet received by */
-   /*  sender in this session */
-};
-
 struct nf_ct_gre {
unsigned int stream_timeout;
unsigned int timeout;
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c 
b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 93198d7..edf0500 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -88,8 +88,8 @@ gre_manip_pkt(struct sk_buff *skb,
  const struct nf_conntrack_tuple *tuple,
  enum nf_nat_manip_type maniptype)
 {
-   const struct gre_hdr *greh;
-   struct gre_hdr_pptp *pgreh;
+   const struct gre_base_hdr *greh;
+   struct pptp_gre_header *pgreh;
 
/* pgreh includes two optional 32bit fields which are not required
 * to be there.  That's where the magic '8' comes from */
@@ -97,18 +97,19 @@ gre_manip_pkt(struct sk_buff *skb,
return false;
 
greh = (void *)skb->data + hdroff;
-   pgreh = (struct gre_hdr_pptp *)greh;
+   pgreh = (struct pptp_gre_header *)greh;
 
/* we only have destination manip of a packet, since 'source key'
 * is not present in the packet itself */
if (maniptype != NF_NAT_MANIP_DST)
return true;
-   switch (greh->version) {
-   case ntohs(GRE_VERSION_0):
+
+   switch (greh->flags & GRE_VERSION) {
+   case GRE_VERSION_0:
/* We do not currently NAT any GREv0 packets.
 * Try to behave like "nf_nat_proto_unknown" */
break;
-   case ntohs(GRE_VERSION_1):
+   case GRE_VERSION_1:
pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c 
b/net/netfilter/nf_conntrack_proto_gre.c
index deb239a..9a715f8 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -192,15 +192,15 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple 
*tuple,
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 struct net *net, struct nf_conntrack_tuple *tuple)
 {
-   const struct gre_hdr_pptp *pgrehdr;
-   struct gre_hdr_pptp _pgrehdr;
+   const struct pptp_gre_header *pgrehdr;
+   struct pptp_gre_header _pgrehdr;
__be16 srckey;
-   const struct gre_hdr *grehdr;
-   struct gre_hdr _grehdr;
+   const struct gre_base_hdr *grehdr;
+   struct gre_base_hdr _grehdr;
 
/* first only delinearize old RFC1701 GRE header */
grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
-   if (!grehdr || grehdr->version != ntohs(GRE_VERSION_1)) {
+   if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) {
/* try to behave like 

[PATCH v2 1/2 net-next] netfilter: gre: Use consistent GRE_* macros instead of ones defined by netfilter.

2016-08-22 Thread fgao
From: Gao Feng 

There are already some GRE_* macros in kernel, so it is unnecessary
to define these macros. And remove some useless macros

Signed-off-by: Gao Feng 
---
 v2: Split the original patch to review easily
 v1: Intial patch

 include/linux/netfilter/nf_conntrack_proto_gre.h | 22 ++
 include/uapi/linux/if_tunnel.h   |  1 +
 net/ipv4/netfilter/nf_nat_proto_gre.c|  4 ++--
 net/netfilter/nf_conntrack_proto_gre.c   |  4 ++--
 4 files changed, 7 insertions(+), 24 deletions(-)

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h 
b/include/linux/netfilter/nf_conntrack_proto_gre.h
index df78dc2..0189747 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -1,29 +1,11 @@
 #ifndef _CONNTRACK_PROTO_GRE_H
 #define _CONNTRACK_PROTO_GRE_H
 #include 
+#include 
+#include 
 
 /* GRE PROTOCOL HEADER */
 
-/* GRE Version field */
-#define GRE_VERSION_1701   0x0
-#define GRE_VERSION_PPTP   0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP  0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C 0x80
-#define GRE_FLAG_R 0x40
-#define GRE_FLAG_K 0x20
-#define GRE_FLAG_S 0x10
-#define GRE_FLAG_A 0x80
-
-#define GRE_IS_C(f)((f)_FLAG_C)
-#define GRE_IS_R(f)((f)_FLAG_R)
-#define GRE_IS_K(f)((f)_FLAG_K)
-#define GRE_IS_S(f)((f)_FLAG_S)
-#define GRE_IS_A(f)((f)_FLAG_A)
-
 /* GRE is a mess: Four different standards */
 struct gre_hdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 361b9f0..1b27e2c 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -36,6 +36,7 @@
 #define GRE_IS_REC(f)  ((f) & GRE_REC)
 #define GRE_IS_ACK(f)  ((f) & GRE_ACK)
 
+#define GRE_VERSION_0  __cpu_to_be16(0x)
 #define GRE_VERSION_1  __cpu_to_be16(0x0001)
 #define GRE_PROTO_PPP  __cpu_to_be16(0x880b)
 #define GRE_PPTP_KEY_MASK  __cpu_to_be32(0x)
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c 
b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 9414923..93198d7 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -104,11 +104,11 @@ gre_manip_pkt(struct sk_buff *skb,
if (maniptype != NF_NAT_MANIP_DST)
return true;
switch (greh->version) {
-   case GRE_VERSION_1701:
+   case ntohs(GRE_VERSION_0):
/* We do not currently NAT any GREv0 packets.
 * Try to behave like "nf_nat_proto_unknown" */
break;
-   case GRE_VERSION_PPTP:
+   case ntohs(GRE_VERSION_1):
pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c 
b/net/netfilter/nf_conntrack_proto_gre.c
index a96451a..deb239a 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -200,7 +200,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, 
unsigned int dataoff,
 
/* first only delinearize old RFC1701 GRE header */
grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
-   if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+   if (!grehdr || grehdr->version != ntohs(GRE_VERSION_1)) {
/* try to behave like "nf_conntrack_proto_generic" */
tuple->src.u.all = 0;
tuple->dst.u.all = 0;
@@ -212,7 +212,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, 
unsigned int dataoff,
if (!pgrehdr)
return true;
 
-   if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
+   if (grehdr->protocol != GRE_PROTO_PPP) {
pr_debug("GRE_VERSION_PPTP but unknown proto\n");
return false;
}
-- 
1.9.1



[PATCH v5 net-next] l2tp: Refactor the codes with existing macros instead of literal number

2016-08-22 Thread fgao
From: Gao Feng 

Use PPP_ALLSTATIONS, PPP_UI, and SEND_SHUTDOWN instead of 0xff,
0x03, and 2 separately.

Signed-off-by: Gao Feng 
---
 v5: Remove tail space;
 v4: Remove two static ppph variables;
 v3: Modify the subject;
 v2: Only replace the literal number with macros according to Guillaume's advice
 v1: Inital patch

 net/l2tp/l2tp_ppp.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d9560aa..0c071c4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
if (!pskb_may_pull(skb, 2))
return 1;
 
-   if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+   if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI))
skb_pull(skb, 2);
 
return 0;
@@ -282,7 +282,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session 
*session)
 static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
 {
-   static const unsigned char ppph[2] = { 0xff, 0x03 };
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error;
@@ -312,7 +311,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct 
msghdr *m,
error = -ENOMEM;
skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
   uhlen + session->hdr_len +
-  sizeof(ppph) + total_len,
+  2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & 
PPP_UI */
   0, GFP_KERNEL);
if (!skb)
goto error_put_sess_tun;
@@ -325,8 +324,8 @@ static int pppol2tp_sendmsg(struct socket *sock, struct 
msghdr *m,
skb_reserve(skb, uhlen);
 
/* Add PPP header */
-   skb->data[0] = ppph[0];
-   skb->data[1] = ppph[1];
+   skb->data[0] = PPP_ALLSTATIONS;
+   skb->data[1] = PPP_UI;
skb_put(skb, 2);
 
/* Copy user data into skb */
@@ -369,7 +368,6 @@ error:
  */
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 {
-   static const u8 ppph[2] = { 0xff, 0x03 };
struct sock *sk = (struct sock *) chan->private;
struct sock *sk_tun;
struct l2tp_session *session;
@@ -398,14 +396,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
   sizeof(struct iphdr) + /* IP header */
   uhlen +  /* UDP header (if L2TP_ENCAPTYPE_UDP) */
   session->hdr_len +   /* L2TP header */
-  sizeof(ppph);/* PPP header */
+  2;   /* 2 bytes for PPP_ALLSTATIONS & PPP_UI 
*/
if (skb_cow_head(skb, headroom))
goto abort_put_sess_tun;
 
/* Setup PPP header */
-   __skb_push(skb, sizeof(ppph));
-   skb->data[0] = ppph[0];
-   skb->data[1] = ppph[1];
+   __skb_push(skb, 2);
+   skb->data[0] = PPP_ALLSTATIONS;
+   skb->data[1] = PPP_UI;
 
local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
@@ -440,7 +438,7 @@ static void pppol2tp_session_close(struct l2tp_session 
*session)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
if (sock) {
-   inet_shutdown(sock, 2);
+   inet_shutdown(sock, SEND_SHUTDOWN);
/* Don't let the session go away before our socket does */
l2tp_session_inc_refcount(session);
}
-- 
1.9.1



[PATCH v4 net-next] l2tp: Refactor the codes with existing macros instead of literal number

2016-08-22 Thread fgao
From: Gao Feng 

Use PPP_ALLSTATIONS, PPP_UI, and SEND_SHUTDOWN instead of 0xff,
0x03, and 2 separately.

Signed-off-by: Gao Feng 
---
 v4: Remove two static ppph variables;
 v3: Modify the subject;
 v2: Only replace the literal number with macros according to Guillaume's advice
 v1: Inital patch

 net/l2tp/l2tp_ppp.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d9560aa..0c071c4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
if (!pskb_may_pull(skb, 2))
return 1;
 
-   if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+   if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI))
skb_pull(skb, 2);
 
return 0;
@@ -282,7 +282,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session 
*session)
 static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
 {
-   static const unsigned char ppph[2] = { 0xff, 0x03 };
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error;
@@ -312,7 +311,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct 
msghdr *m,
error = -ENOMEM;
skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
   uhlen + session->hdr_len +
-  sizeof(ppph) + total_len,
+  2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & 
PPP_UI */
   0, GFP_KERNEL);
if (!skb)
goto error_put_sess_tun;
@@ -325,8 +324,8 @@ static int pppol2tp_sendmsg(struct socket *sock, struct 
msghdr *m,
skb_reserve(skb, uhlen);
 
/* Add PPP header */
-   skb->data[0] = ppph[0];
-   skb->data[1] = ppph[1];
+   skb->data[0] = PPP_ALLSTATIONS; 
+   skb->data[1] = PPP_UI;
skb_put(skb, 2);
 
/* Copy user data into skb */
@@ -369,7 +368,6 @@ error:
  */
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 {
-   static const u8 ppph[2] = { 0xff, 0x03 };
struct sock *sk = (struct sock *) chan->private;
struct sock *sk_tun;
struct l2tp_session *session;
@@ -398,14 +396,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
   sizeof(struct iphdr) + /* IP header */
   uhlen +  /* UDP header (if L2TP_ENCAPTYPE_UDP) */
   session->hdr_len +   /* L2TP header */
-  sizeof(ppph);/* PPP header */
+  2;   /* 2 bytes for PPP_ALLSTATIONS & PPP_UI 
*/
if (skb_cow_head(skb, headroom))
goto abort_put_sess_tun;
 
/* Setup PPP header */
-   __skb_push(skb, sizeof(ppph));
-   skb->data[0] = ppph[0];
-   skb->data[1] = ppph[1];
+   __skb_push(skb, 2); 
+   skb->data[0] = PPP_ALLSTATIONS;
+   skb->data[1] = PPP_UI;
 
local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
@@ -440,7 +438,7 @@ static void pppol2tp_session_close(struct l2tp_session 
*session)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
if (sock) {
-   inet_shutdown(sock, 2);
+   inet_shutdown(sock, SEND_SHUTDOWN);
/* Don't let the session go away before our socket does */
l2tp_session_inc_refcount(session);
}
-- 
1.9.1



[PATCH v4 net-next] ppp: Fix one deadlock issue of PPP when reentrant

2016-08-21 Thread fgao
From: Gao Feng 

PPP channel holds one spinlock before send frame. But the skb may
select the same PPP channel with wrong route policy. As a result,
the skb reaches the same channel path. It tries to get the same
spinlock which is held before. Bang, the deadlock comes out.

Now add one lock owner to avoid it like xmit_lock_owner of
netdev_queue. Check the lock owner before try to get the spinlock.
If the current cpu is already the owner, it means ppp finds there is
one reentrant and returns directly. If not owner and hold the spinlock
successfully, it sets owner with current CPU ID.

The following is the panic stack of 3.3.8. But the same issue
should be in the upstream too.

[] ? _raw_spin_lock_bh+0x11/0x40
[] ppp_unregister_channel+0x1347/0x2170 [ppp_generic]
[] ? kmem_cache_free+0xa7/0xc0
[] ppp_unregister_channel+0x1db7/0x2170 [ppp_generic]
[] ppp_unregister_channel+0x2065/0x2170 [ppp_generic]
[] dev_hard_start_xmit+0x4cd/0x620
[] sch_direct_xmit+0x74/0x1d0
[] dev_queue_xmit+0x1d/0x30
[] neigh_direct_output+0xc/0x10
[] ip_finish_output+0x25e/0x2b0
[] ip_output+0x88/0x90
[] ? __ip_local_out+0x9f/0xb0
[] ip_local_out+0x24/0x30
[] 0xa00b9744
[] ppp_unregister_channel+0x20f8/0x2170 [ppp_generic]
[] ppp_output_wakeup+0x122/0x11d0 [ppp_generic]
[] vfs_write+0xb8/0x160
[] sys_write+0x45/0x90
[] system_call_fastpath+0x16/0x1b

The call flow is like this.
ppp_write->ppp_channel_push->start_xmit->select inappropriate route
 -> dev_hard_start_xmit->ppp_start_xmit->ppp_xmit_process->
ppp_push. Now ppp_push tries to get the same spinlock which is held
in ppp_channel_push.

Although the PPP deadlock is caused by inappropriate route policy
with L2TP, I think it is not accepted the PPP module would cause kernel
deadlock with wrong route policy.

Signed-off-by: Gao Feng 
---
 v4: 1) Remove the inline; 
 2) Add _ppp_channel_lock/unlock_bh for non-xmit path;
 3) Move local_bh_enable before print log;
 v3: Change the fix solution. Giveup the send chance instead of recursive lock
 v2: Fix recursive unlock issue
 v1: Initial patch

 drivers/net/ppp/ppp_generic.c | 104 +-
 1 file changed, 82 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 70cfa06..48a957a 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -162,6 +162,58 @@ struct ppp {
 |SC_MULTILINK|SC_MP_SHORTSEQ|SC_MP_XSHORTSEQ \
 |SC_COMP_TCP|SC_REJ_COMP_TCP|SC_MUST_COMP)
 
+struct channel_lock {
+   spinlock_t lock;
+   int owner;
+};
+
+static void ppp_channel_lock_init(struct channel_lock *cl)
+{
+   cl->owner = -1;
+   spin_lock_init(>lock);
+}
+
+/* Used in non-xmit path */
+static void _ppp_channel_lock_bh(struct channel_lock *cl)
+{
+   spin_lock_bh(>lock);
+}
+
+/* Used in non-xmit path */
+static void _ppp_channel_unlock_bh(struct channel_lock *cl)
+{
+   spin_unlock_bh(>lock);
+}
+
+static bool ppp_channel_lock_bh(struct channel_lock *cl)
+{
+   int cpu;
+
+   local_bh_disable();
+   cpu = smp_processor_id();
+   if (cpu == cl->owner) {
+   local_bh_enable();
+   /* The CPU already holds this channel lock and sends. But the
+* channel is selected after inappropriate route. It causes
+* reenter the channel again. It is forbidden by PPP module.
+*/
+   if (net_ratelimit())
+   pr_err("PPP detects one recursive channel send\n");
+   return false;
+   }
+   spin_lock(>lock);
+   cl->owner = cpu;
+
+   return true;
+}
+
+static void ppp_channel_unlock_bh(struct channel_lock *cl)
+{
+   cl->owner = -1;
+   spin_unlock(>lock);
+   local_bh_enable();
+}
+
 /*
  * Private data structure for each channel.
  * This includes the data structure used for multilink.
@@ -171,7 +223,7 @@ struct channel {
struct list_head list;  /* link in all/new_channels list */
struct ppp_channel *chan;   /* public channel data structure */
struct rw_semaphore chan_sem;   /* protects `chan' during chan ioctl */
-   spinlock_t  downl;  /* protects `chan', file.xq dequeue */
+   struct channel_lock downl;  /* protects `chan', file.xq dequeue */
struct ppp  *ppp;   /* ppp unit we're connected to */
struct net  *chan_net;  /* the net channel belongs to */
struct list_head clist; /* link in list of channels per unit */
@@ -1587,9 +1639,7 @@ ppp_push(struct ppp *ppp)
list = >channels;
if (list_empty(list)) {
/* nowhere to send the packet, just drop it */
-   ppp->xmit_pending = NULL;
-   kfree_skb(skb);
-   return;
+   goto drop;
}
 
if ((ppp->flags & SC_MULTILINK) == 0) {
@@ 

[PATCH v3 net-next] l2tp: Refactor the codes with existing macros instead of literal number

2016-08-20 Thread fgao
From: Gao Feng 

Use PPP_ALLSTATIONS, PPP_UI, and SEND_SHUTDOWN instead of 0xff,
0x03, and 2 separately.

Signed-off-by: Gao Feng 
---
 v3: Modify the subject;
 v2: Only replace the literal number with macros according to Guillaume's advice
 v1: Inital patch

 net/l2tp/l2tp_ppp.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d9560aa..65e2fd6 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
if (!pskb_may_pull(skb, 2))
return 1;
 
-   if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+   if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI))
skb_pull(skb, 2);
 
return 0;
@@ -282,7 +282,7 @@ static void pppol2tp_session_sock_put(struct l2tp_session 
*session)
 static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
 {
-   static const unsigned char ppph[2] = { 0xff, 0x03 };
+   static const unsigned char ppph[2] = {PPP_ALLSTATIONS, PPP_UI};
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error;
@@ -369,7 +369,7 @@ error:
  */
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 {
-   static const u8 ppph[2] = { 0xff, 0x03 };
+   static const u8 ppph[2] = {PPP_ALLSTATIONS, PPP_UI};
struct sock *sk = (struct sock *) chan->private;
struct sock *sk_tun;
struct l2tp_session *session;
@@ -440,7 +440,7 @@ static void pppol2tp_session_close(struct l2tp_session 
*session)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
if (sock) {
-   inet_shutdown(sock, 2);
+   inet_shutdown(sock, SEND_SHUTDOWN);
/* Don't let the session go away before our socket does */
l2tp_session_inc_refcount(session);
}
-- 
1.9.1




[PATCH 1/1] ppp: Fix one deadlock issue of PPP when reentrant

2016-08-19 Thread fgao
From: Gao Feng 

PPP channel holds one spinlock before send frame. But the skb may
select the same PPP channel with wrong route policy. As a result,
the skb reaches the same channel path. It tries to get the same
spinlock which is held before. Bang, the deadlock comes out.

Now add one lock owner to avoid it like xmit_lock_owner of
netdev_queue. Check the lock owner before try to get the spinlock.
If the current cpu is already the owner, it means ppp finds there is
one reentrant and returns directly. If not owner and hold the spinlock
successfully, it sets owner with current CPU ID.

The following is the panic stack of 3.3.8. But the same issue
should be in the upstream too.

[] ? _raw_spin_lock_bh+0x11/0x40
[] ppp_unregister_channel+0x1347/0x2170 [ppp_generic]
[] ? kmem_cache_free+0xa7/0xc0
[] ppp_unregister_channel+0x1db7/0x2170 [ppp_generic]
[] ppp_unregister_channel+0x2065/0x2170 [ppp_generic]
[] dev_hard_start_xmit+0x4cd/0x620
[] sch_direct_xmit+0x74/0x1d0
[] dev_queue_xmit+0x1d/0x30
[] neigh_direct_output+0xc/0x10
[] ip_finish_output+0x25e/0x2b0
[] ip_output+0x88/0x90
[] ? __ip_local_out+0x9f/0xb0
[] ip_local_out+0x24/0x30
[] 0xa00b9744
[] ppp_unregister_channel+0x20f8/0x2170 [ppp_generic]
[] ppp_output_wakeup+0x122/0x11d0 [ppp_generic]
[] vfs_write+0xb8/0x160
[] sys_write+0x45/0x90
[] system_call_fastpath+0x16/0x1b

The call flow is like this.
ppp_write->ppp_channel_push->start_xmit->select inappropriate route
 -> dev_hard_start_xmit->ppp_start_xmit->ppp_xmit_process->
ppp_push. Now ppp_push tries to get the same spinlock which is held
in ppp_channel_push.

Although the PPP deadlock is caused by inappropriate route policy
with L2TP, I think it is not accepted the PPP module would cause kernel
deadlock with wrong route policy.

Signed-off-by: Gao Feng 
---
 v3: Change the fix solution. Giveup the send chance instead of recursive lock
 v2: Fix recursive unlock issue
 v1: Initial patch
 
 drivers/net/ppp/ppp_generic.c | 95 +--
 1 file changed, 73 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 70cfa06..b653f1f 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -162,6 +162,46 @@ struct ppp {
 |SC_MULTILINK|SC_MP_SHORTSEQ|SC_MP_XSHORTSEQ \
 |SC_COMP_TCP|SC_REJ_COMP_TCP|SC_MUST_COMP)
 
+struct channel_lock {
+   spinlock_t lock;
+   int owner;
+};
+
+static inline void ppp_channel_lock_init(struct channel_lock *cl)
+{
+   cl->owner = -1;
+   spin_lock_init(>lock);
+}
+
+static inline bool ppp_channel_lock_bh(struct channel_lock *cl)
+{
+   int cpu;
+
+   local_bh_disable();
+   cpu = smp_processor_id();
+   if (cpu == cl->owner) {
+   /* The CPU already holds this channel lock and sends. But the
+* channel is selected after inappropriate route. It causes
+* reenter the channel again. It is forbidden by PPP module.
+*/
+   if (net_ratelimit())
+   pr_err("PPP detects one recursive channel send\n");
+   local_bh_enable();
+   return false;
+   }
+   spin_lock(>lock);
+   cl->owner = cpu;
+
+   return true;
+}
+
+static inline void ppp_channel_unlock_bh(struct channel_lock *cl)
+{
+   cl->owner = -1;
+   spin_unlock(>lock);
+   local_bh_enable();
+}
+
 /*
  * Private data structure for each channel.
  * This includes the data structure used for multilink.
@@ -171,7 +211,7 @@ struct channel {
struct list_head list;  /* link in all/new_channels list */
struct ppp_channel *chan;   /* public channel data structure */
struct rw_semaphore chan_sem;   /* protects `chan' during chan ioctl */
-   spinlock_t  downl;  /* protects `chan', file.xq dequeue */
+   struct channel_lock downl;  /* protects `chan', file.xq dequeue */
struct ppp  *ppp;   /* ppp unit we're connected to */
struct net  *chan_net;  /* the net channel belongs to */
struct list_head clist; /* link in list of channels per unit */
@@ -1587,9 +1627,7 @@ ppp_push(struct ppp *ppp)
list = >channels;
if (list_empty(list)) {
/* nowhere to send the packet, just drop it */
-   ppp->xmit_pending = NULL;
-   kfree_skb(skb);
-   return;
+   goto drop;
}
 
if ((ppp->flags & SC_MULTILINK) == 0) {
@@ -1597,16 +1635,19 @@ ppp_push(struct ppp *ppp)
list = list->next;
pch = list_entry(list, struct channel, clist);
 
-   spin_lock_bh(>downl);
+   if (unlikely(!ppp_channel_lock_bh(>downl))) {
+   /* Fail to hold channel lock */
+   goto drop;
+   }
  

[PATCH 1/1] netfilter: gre: Use the consitent GRE and PPTP struct instead of the structures defined in netfilter

2016-08-19 Thread fgao
From: Gao Feng 

There are two structures which define the GRE header and PPTP
header. So it is unneccessary to define duplicated structures in
netfilter again.

Signed-off-by: Gao Feng 
---
 v1: Intial patch

 include/linux/netfilter/nf_conntrack_proto_gre.h | 63 +---
 include/uapi/linux/if_tunnel.h   |  1 +
 net/ipv4/netfilter/nf_nat_proto_gre.c| 15 +++---
 net/netfilter/nf_conntrack_proto_gre.c   | 14 +++---
 4 files changed, 19 insertions(+), 74 deletions(-)

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h 
b/include/linux/netfilter/nf_conntrack_proto_gre.h
index df78dc2..9c741da 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -2,67 +2,8 @@
 #define _CONNTRACK_PROTO_GRE_H
 #include 
 
-/* GRE PROTOCOL HEADER */
-
-/* GRE Version field */
-#define GRE_VERSION_1701   0x0
-#define GRE_VERSION_PPTP   0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP  0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C 0x80
-#define GRE_FLAG_R 0x40
-#define GRE_FLAG_K 0x20
-#define GRE_FLAG_S 0x10
-#define GRE_FLAG_A 0x80
-
-#define GRE_IS_C(f)((f)_FLAG_C)
-#define GRE_IS_R(f)((f)_FLAG_R)
-#define GRE_IS_K(f)((f)_FLAG_K)
-#define GRE_IS_S(f)((f)_FLAG_S)
-#define GRE_IS_A(f)((f)_FLAG_A)
-
-/* GRE is a mess: Four different standards */
-struct gre_hdr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-   __u16   rec:3,
-   srr:1,
-   seq:1,
-   key:1,
-   routing:1,
-   csum:1,
-   version:3,
-   reserved:4,
-   ack:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-   __u16   csum:1,
-   routing:1,
-   key:1,
-   seq:1,
-   srr:1,
-   rec:3,
-   ack:1,
-   reserved:4,
-   version:3;
-#else
-#error "Adjust your  defines"
-#endif
-   __be16  protocol;
-};
-
-/* modified GRE header for PPTP */
-struct gre_hdr_pptp {
-   __u8   flags;   /* bitfield */
-   __u8   version; /* should be GRE_VERSION_PPTP */
-   __be16 protocol;/* should be GRE_PROTOCOL_PPTP */
-   __be16 payload_len; /* size of ppp payload, not inc. gre header */
-   __be16 call_id; /* peer's call_id for this session */
-   __be32 seq; /* sequence number.  Present if S==1 */
-   __be32 ack; /* seq number of highest packet received by */
-   /*  sender in this session */
-};
+#include 
+#include 
 
 struct nf_ct_gre {
unsigned int stream_timeout;
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 361b9f0..1b27e2c 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -36,6 +36,7 @@
 #define GRE_IS_REC(f)  ((f) & GRE_REC)
 #define GRE_IS_ACK(f)  ((f) & GRE_ACK)
 
+#define GRE_VERSION_0  __cpu_to_be16(0x)
 #define GRE_VERSION_1  __cpu_to_be16(0x0001)
 #define GRE_PROTO_PPP  __cpu_to_be16(0x880b)
 #define GRE_PPTP_KEY_MASK  __cpu_to_be32(0x)
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c 
b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 9414923..afe81a8 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -88,8 +88,9 @@ gre_manip_pkt(struct sk_buff *skb,
  const struct nf_conntrack_tuple *tuple,
  enum nf_nat_manip_type maniptype)
 {
-   const struct gre_hdr *greh;
-   struct gre_hdr_pptp *pgreh;
+   const struct gre_base_hdr *greh;
+   struct pptp_gre_header *pgreh;
+   u16 gre_ver;
 
/* pgreh includes two optional 32bit fields which are not required
 * to be there.  That's where the magic '8' comes from */
@@ -97,18 +98,20 @@ gre_manip_pkt(struct sk_buff *skb,
return false;
 
greh = (void *)skb->data + hdroff;
-   pgreh = (struct gre_hdr_pptp *)greh;
+   pgreh = (struct pptp_gre_header *)greh;
 
/* we only have destination manip of a packet, since 'source key'
 * is not present in the packet itself */
if (maniptype != NF_NAT_MANIP_DST)
return true;
-   switch (greh->version) {
-   case GRE_VERSION_1701:
+
+   gre_ver = ntohs(greh->flags & GRE_VERSION);
+   switch (gre_ver) {
+   case GRE_VERSION_0:
/* We do not currently NAT any GREv0 packets.
 * Try to behave like "nf_nat_proto_unknown" */
break;
-   case GRE_VERSION_PPTP:
+   case GRE_VERSION_1:
pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
diff --git 

[PATCH v1 1/1] l2tp: Use existing macros instead of literal number

2016-08-19 Thread fgao
From: Gao Feng 

1. Use PPP_ALLSTATIONS/PPP_UI instead of literal 0xff/0x03;
2. Use one static const global fixed_ppphdr instead of two same
static variable ppph in two different functions;
3. Use SEND_SHUTDOWN instead of literal 2;

Signed-off-by: Gao Feng 
---
 v1: Initial patch

 net/l2tp/l2tp_ppp.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d9560aa..0e69eb4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -138,6 +138,8 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = {
 
 static const struct proto_ops pppol2tp_ops;
 
+static const unsigned char fixed_ppphdr[2] = {PPP_ALLSTATIONS, PPP_UI};
+
 /* Helpers to obtain tunnel/session contexts from sockets.
  */
 static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
@@ -174,11 +176,11 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
 * the field may be unaligned.
 */
-   if (!pskb_may_pull(skb, 2))
+   if (!pskb_may_pull(skb, sizeof(fixed_ppphdr)))
return 1;
 
-   if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
-   skb_pull(skb, 2);
+   if ((PPP_ADDRESS(skb->data) == PPP_ALLSTATIONS) && 
(PPP_CONTROL(skb->data) == PPP_UI))
+   skb_pull(skb, sizeof(fixed_ppphdr));
 
return 0;
 }
@@ -282,7 +284,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session 
*session)
 static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
 {
-   static const unsigned char ppph[2] = { 0xff, 0x03 };
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error;
@@ -312,7 +313,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct 
msghdr *m,
error = -ENOMEM;
skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
   uhlen + session->hdr_len +
-  sizeof(ppph) + total_len,
+  sizeof(fixed_ppphdr) + total_len,
   0, GFP_KERNEL);
if (!skb)
goto error_put_sess_tun;
@@ -325,9 +326,9 @@ static int pppol2tp_sendmsg(struct socket *sock, struct 
msghdr *m,
skb_reserve(skb, uhlen);
 
/* Add PPP header */
-   skb->data[0] = ppph[0];
-   skb->data[1] = ppph[1];
-   skb_put(skb, 2);
+   PPP_ADDRESS(skb->data) = fixed_ppphdr[0];
+   PPP_CONTROL(skb->data) = fixed_ppphdr[1];
+   skb_put(skb, sizeof(fixed_ppphdr));
 
/* Copy user data into skb */
error = memcpy_from_msg(skb_put(skb, total_len), m, total_len);
@@ -369,7 +370,6 @@ error:
  */
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 {
-   static const u8 ppph[2] = { 0xff, 0x03 };
struct sock *sk = (struct sock *) chan->private;
struct sock *sk_tun;
struct l2tp_session *session;
@@ -398,14 +398,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
   sizeof(struct iphdr) + /* IP header */
   uhlen +  /* UDP header (if L2TP_ENCAPTYPE_UDP) */
   session->hdr_len +   /* L2TP header */
-  sizeof(ppph);/* PPP header */
+  sizeof(fixed_ppphdr); /* PPP header */
if (skb_cow_head(skb, headroom))
goto abort_put_sess_tun;
 
/* Setup PPP header */
-   __skb_push(skb, sizeof(ppph));
-   skb->data[0] = ppph[0];
-   skb->data[1] = ppph[1];
+   __skb_push(skb, sizeof(fixed_ppphdr));
+   skb->data[0] = fixed_ppphdr[0];
+   skb->data[1] = fixed_ppphdr[1];
 
local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
@@ -440,7 +440,7 @@ static void pppol2tp_session_close(struct l2tp_session 
*session)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
if (sock) {
-   inet_shutdown(sock, 2);
+   inet_shutdown(sock, SEND_SHUTDOWN);
/* Don't let the session go away before our socket does */
l2tp_session_inc_refcount(session);
}
-- 
1.9.1



[PATCH 1/1] l2tp: Fix the connect status check in pppol2tp_getname

2016-08-18 Thread fgao
From: Gao Feng 

The sk->sk_state is bits flag, so need use bit operation check
instead of value check.

Signed-off-by: Gao Feng 
---
 net/l2tp/l2tp_ppp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d9560aa..232cb92 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -856,7 +856,7 @@ static int pppol2tp_getname(struct socket *sock, struct 
sockaddr *uaddr,
error = -ENOTCONN;
if (sk == NULL)
goto end;
-   if (sk->sk_state != PPPOX_CONNECTED)
+   if (!(sk->sk_state & PPPOX_CONNECTED))
goto end;
 
error = -EBADF;
-- 
1.9.1




[PATCH v1 1/1] l2tp: Use existing macros instead of literal number

2016-08-18 Thread fgao
From: Gao Feng 

1. Use PPP_ALLSTATIONS/PPP_UI instead of literal 0xff/0x03;
2. Use one static const global fixed_ppphdr instead of two same
static variable ppph in two different functions;
3. Use SEND_SHUTDOWN instead of literal 2;

Signed-off-by: Gao Feng 
---
 v1: Initial patch

 net/l2tp/l2tp_ppp.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d9560aa..0e69eb4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -138,6 +138,8 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = {
 
 static const struct proto_ops pppol2tp_ops;
 
+static const unsigned char fixed_ppphdr[2] = {PPP_ALLSTATIONS, PPP_UI};
+
 /* Helpers to obtain tunnel/session contexts from sockets.
  */
 static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
@@ -174,11 +176,11 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
 * the field may be unaligned.
 */
-   if (!pskb_may_pull(skb, 2))
+   if (!pskb_may_pull(skb, sizeof(fixed_ppphdr)))
return 1;
 
-   if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
-   skb_pull(skb, 2);
+   if ((PPP_ADDRESS(skb->data) == PPP_ALLSTATIONS) && 
(PPP_CONTROL(skb->data) == PPP_UI))
+   skb_pull(skb, sizeof(fixed_ppphdr));
 
return 0;
 }
@@ -282,7 +284,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session 
*session)
 static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
 {
-   static const unsigned char ppph[2] = { 0xff, 0x03 };
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error;
@@ -312,7 +313,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct 
msghdr *m,
error = -ENOMEM;
skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
   uhlen + session->hdr_len +
-  sizeof(ppph) + total_len,
+  sizeof(fixed_ppphdr) + total_len,
   0, GFP_KERNEL);
if (!skb)
goto error_put_sess_tun;
@@ -325,9 +326,9 @@ static int pppol2tp_sendmsg(struct socket *sock, struct 
msghdr *m,
skb_reserve(skb, uhlen);
 
/* Add PPP header */
-   skb->data[0] = ppph[0];
-   skb->data[1] = ppph[1];
-   skb_put(skb, 2);
+   PPP_ADDRESS(skb->data) = fixed_ppphdr[0];
+   PPP_CONTROL(skb->data) = fixed_ppphdr[1];
+   skb_put(skb, sizeof(fixed_ppphdr));
 
/* Copy user data into skb */
error = memcpy_from_msg(skb_put(skb, total_len), m, total_len);
@@ -369,7 +370,6 @@ error:
  */
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 {
-   static const u8 ppph[2] = { 0xff, 0x03 };
struct sock *sk = (struct sock *) chan->private;
struct sock *sk_tun;
struct l2tp_session *session;
@@ -398,14 +398,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
   sizeof(struct iphdr) + /* IP header */
   uhlen +  /* UDP header (if L2TP_ENCAPTYPE_UDP) */
   session->hdr_len +   /* L2TP header */
-  sizeof(ppph);/* PPP header */
+  sizeof(fixed_ppphdr); /* PPP header */
if (skb_cow_head(skb, headroom))
goto abort_put_sess_tun;
 
/* Setup PPP header */
-   __skb_push(skb, sizeof(ppph));
-   skb->data[0] = ppph[0];
-   skb->data[1] = ppph[1];
+   __skb_push(skb, sizeof(fixed_ppphdr));
+   skb->data[0] = fixed_ppphdr[0];
+   skb->data[1] = fixed_ppphdr[1];
 
local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
@@ -440,7 +440,7 @@ static void pppol2tp_session_close(struct l2tp_session 
*session)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
if (sock) {
-   inet_shutdown(sock, 2);
+   inet_shutdown(sock, SEND_SHUTDOWN);
/* Don't let the session go away before our socket does */
l2tp_session_inc_refcount(session);
}
-- 
1.9.1



[PATCH 2/2] pptp: Reset call_id as 0 to avoid one useless lookup at next time

2016-08-17 Thread fgao
From: Gao Feng 

When pptp fails to get valid callid, the global call_id is set as
MAX_CALLID. Then it must fail to get callid at next time, when
invoke find_next_zero_bit from call_id+1. Because the call_id+1
exceeds the limit "MAX_CALLID".
So reset call_id as 0 when fail to get valid callid. And add one
variable to check if need the second lookup.

Signed-off-by: Gao Feng 
---
 v1: Initial patch

 drivers/net/ppp/pptp.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 48c3701..9afef3c 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -103,12 +103,21 @@ static int add_chan(struct pppox_sock *sock,
static int call_id;
 
spin_lock(_lock);
-   if (!sa->call_id)   {
+   if (!sa->call_id) {
+   bool from_start = (call_id == 0);
+
call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id 
+ 1);
if (call_id == MAX_CALLID) {
-   call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 
1);
-   if (call_id == MAX_CALLID)
+   if (unlikely(from_start)) {
+   call_id = 0;
goto out_err;
+   } else {
+   call_id = find_next_zero_bit(callid_bitmap, 
MAX_CALLID, 1);
+   if (call_id == MAX_CALLID) {
+   call_id = 0;
+   goto out_err;
+   }
+   }
}
sa->call_id = call_id;
} else if (test_bit(sa->call_id, callid_bitmap)) {
@@ -656,8 +665,10 @@ static int __init pptp_init_module(void)
pr_info("PPTP driver version " PPTP_DRIVER_VERSION "\n");
 
callid_sock = vzalloc((MAX_CALLID + 1) * sizeof(void *));
-   if (!callid_sock)
+   if (!callid_sock) {
+   pr_err("PPTP: can't alloc callid_sock mem");
return -ENOMEM;
+   }
 
err = gre_add_protocol(_pptp_protocol, GREPROTO_PPTP);
if (err) {
-- 
1.9.1




[PATCH v1 1/2] pptp: Use macro and sizeof instead of literal number

2016-08-17 Thread fgao
From: Gao Feng 

Use existing macros like PPP_ADDRESS, SC_COMP_PROT and sizeof fixed
variables instead of original literal number to enhance readbility.

BTW, the original pptp_rcv uses literal number "12" as the param
of pskb_may_pull. Actually the "12" is less than the size of struct
pptp_gre_header. Now use the sizeof(*header) fixes this issue.

Signed-off-by: Gao Feng 
---
 v1: Initial patch

 drivers/net/ppp/pptp.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 1951b10..48c3701 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -54,6 +54,8 @@ static struct proto pptp_sk_proto __read_mostly;
 static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
+static const u8 fixed_ppphdr[2] = {PPP_ALLSTATIONS, PPP_UI};
+
 static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 {
struct pppox_sock *sock;
@@ -167,7 +169,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
 
tdev = rt->dst.dev;
 
-   max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(*iph) + sizeof(*hdr) + 
2;
+   max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(*iph) + sizeof(*hdr) + 
sizeof(fixed_ppphdr);
 
if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || 
skb_shared(skb)) {
struct sk_buff *new_skb = skb_realloc_headroom(skb, 
max_headroom);
@@ -190,9 +192,9 @@ static int pptp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
 
/* Put in the address/control bytes if necessary */
if ((opt->ppp_flags & SC_COMP_AC) == 0 || islcp) {
-   data = skb_push(skb, 2);
-   data[0] = PPP_ALLSTATIONS;
-   data[1] = PPP_UI;
+   data = skb_push(skb, sizeof(fixed_ppphdr));
+   data[0] = fixed_ppphdr[0];
+   data[1] = fixed_ppphdr[1];
}
 
len = skb->len;
@@ -219,8 +221,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
}
hdr->payload_len = htons(len);
 
-   /*  Push down and install the IP header. */
-
+   /* Push down and install the IP header. */
skb_reset_transport_header(skb);
skb_push(skb, sizeof(*iph));
skb_reset_network_header(skb);
@@ -319,14 +320,14 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff 
*skb)
 allow_packet:
skb_pull(skb, headersize);
 
-   if (payload[0] == PPP_ALLSTATIONS && payload[1] == PPP_UI) {
+   if (PPP_ADDRESS(payload) == PPP_ALLSTATIONS && 
PPP_CONTROL(payload) == PPP_UI) {
/* chop off address/control */
if (skb->len < 3)
goto drop;
-   skb_pull(skb, 2);
+   skb_pull(skb, sizeof(fixed_ppphdr));
}
 
-   if ((*skb->data) & 1) {
+   if ((*skb->data) & SC_COMP_PROT) {
/* protocol is compressed */
skb_push(skb, 1)[0] = 0;
}
@@ -351,7 +352,7 @@ static int pptp_rcv(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto drop;
 
-   if (!pskb_may_pull(skb, 12))
+   if (!pskb_may_pull(skb, sizeof(*header)))
goto drop;
 
iph = ip_hdr(skb);
@@ -468,7 +469,7 @@ static int pptp_connect(struct socket *sock, struct 
sockaddr *uservaddr,
ip_rt_put(rt);
po->chan.mtu -= PPTP_HEADER_OVERHEAD;
 
-   po->chan.hdrlen = 2 + sizeof(struct pptp_gre_header);
+   po->chan.hdrlen = sizeof(fixed_ppphdr) + sizeof(struct pptp_gre_header);
error = ppp_register_channel(>chan);
if (error) {
pr_err("PPTP: failed to register PPP channel (%d)\n", error);
-- 
1.9.1



[PATCH v1 1/1] pppoe: l2tp: the PPPOX_CONNECTED should be used with bit operation

2016-08-17 Thread fgao
From: Gao Feng 

There are some codes in pppoe and l2tp which use the PPPOX_CONNECTED
as the value including assignment and condition check.
They should keep consistent with other codes.

Signed-off-by: Gao Feng 
---
 v1: Initial Patch

 drivers/net/ppp/pppoe.c | 2 +-
 net/l2tp/l2tp_ppp.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 4ddae81..684b773 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -697,7 +697,7 @@ static int pppoe_connect(struct socket *sock, struct 
sockaddr *uservaddr,
goto err_put;
}
 
-   sk->sk_state = PPPOX_CONNECTED;
+   sk->sk_state |= PPPOX_CONNECTED;
}
 
po->num = sp->sa_addr.pppoe.sid;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d9560aa..3984385 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -774,7 +774,7 @@ static int pppol2tp_connect(struct socket *sock, struct 
sockaddr *uservaddr,
 out_no_ppp:
/* This is how we get the session context from the socket. */
sk->sk_user_data = session;
-   sk->sk_state = PPPOX_CONNECTED;
+   sk->sk_state |= PPPOX_CONNECTED;
l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: created\n",
  session->name);
 
@@ -856,7 +856,7 @@ static int pppol2tp_getname(struct socket *sock, struct 
sockaddr *uaddr,
error = -ENOTCONN;
if (sk == NULL)
goto end;
-   if (sk->sk_state != PPPOX_CONNECTED)
+   if (!(sk->sk_state & PPPOX_CONNECTED))
goto end;
 
error = -EBADF;
-- 
1.9.1




[PATCH v2 1/1] ppp: Fix one deadlock issue of PPP when send frame

2016-08-16 Thread fgao
From: Gao Feng 

PPP channel holds one spinlock before send frame. But the skb may
select the same PPP channel with wrong route policy. As a result,
the skb reaches the same channel path. It tries to get the same
spinlock which is held before. Bang, the deadlock comes out.

Now add one lock owner to avoid it like xmit_lock_owner of
netdev_queue. Check the lock owner before try to get the spinlock.
If the current cpu is already the owner, needn't lock again. When
PPP channel holds the spinlock at the first time, it sets owner
with current CPU ID.

The following is the panic stack of 3.3.8. But the same issue
should be in the upstream too.

[] ? _raw_spin_lock_bh+0x11/0x40
[] ppp_unregister_channel+0x1347/0x2170 [ppp_generic]
[] ? kmem_cache_free+0xa7/0xc0
[] ppp_unregister_channel+0x1db7/0x2170 [ppp_generic]
[] ppp_unregister_channel+0x2065/0x2170 [ppp_generic]
[] dev_hard_start_xmit+0x4cd/0x620
[] sch_direct_xmit+0x74/0x1d0
[] dev_queue_xmit+0x1d/0x30
[] neigh_direct_output+0xc/0x10
[] ip_finish_output+0x25e/0x2b0
[] ip_output+0x88/0x90
[] ? __ip_local_out+0x9f/0xb0
[] ip_local_out+0x24/0x30
[] 0xa00b9744
[] ppp_unregister_channel+0x20f8/0x2170 [ppp_generic]
[] ppp_output_wakeup+0x122/0x11d0 [ppp_generic]
[] vfs_write+0xb8/0x160
[] sys_write+0x45/0x90
[] system_call_fastpath+0x16/0x1b

The call flow is like this.
ppp_write->ppp_channel_push->start_xmit->select inappropriate route
 -> dev_hard_start_xmit->ppp_start_xmit->ppp_xmit_process->
ppp_push. Now ppp_push tries to get the same spinlock which is held
in ppp_channel_push.

Although the PPP deadlock is caused by inappropriate route policy
with L2TP, I think it is not accepted the PPP module would cause kernel
deadlock with wrong route policy.

Signed-off-by: Gao Feng 
---
 v2: Add lock_cnt to avoid unlock multiple times when recurisve lock
 v1: Initial patch
 drivers/net/ppp/ppp_generic.c | 57 +--
 1 file changed, 44 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 70cfa06..6909ab1 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -162,6 +162,37 @@ struct ppp {
 |SC_MULTILINK|SC_MP_SHORTSEQ|SC_MP_XSHORTSEQ \
 |SC_COMP_TCP|SC_REJ_COMP_TCP|SC_MUST_COMP)
 
+struct chennel_lock {
+   spinlock_t lock;
+   u32 owner;
+   u32 lock_cnt;
+};
+
+#define PPP_CHANNEL_LOCK_INIT(cl) \
+   cl.owner = -1; \
+   cl.lock_cnt = 0; \
+   spin_lock_init()
+
+#define PPP_CHANNEL_LOCK_BH(cl) \
+   do { \
+   local_bh_disable(); \
+   if (cl.owner != smp_processor_id()) { \
+   spin_lock(); \
+   cl.owner = smp_processor_id(); \
+   } \
+   cl.lock_cnt++; \
+   } while (0)
+
+#define PPP_CHANNEL_UNLOCK_BH(cl) \
+   do { \
+   cl.lock_cnt--; \
+   if (cl.lock_cnt == 0) { \
+   cl.owner = -1; \
+   spin_unlock(); \
+   } \
+   local_bh_enable(); \
+   } while (0)
+
 /*
  * Private data structure for each channel.
  * This includes the data structure used for multilink.
@@ -171,7 +202,7 @@ struct channel {
struct list_head list;  /* link in all/new_channels list */
struct ppp_channel *chan;   /* public channel data structure */
struct rw_semaphore chan_sem;   /* protects `chan' during chan ioctl */
-   spinlock_t  downl;  /* protects `chan', file.xq dequeue */
+   struct chennel_lock downl;  /* protects `chan', file.xq dequeue */
struct ppp  *ppp;   /* ppp unit we're connected to */
struct net  *chan_net;  /* the net channel belongs to */
struct list_head clist; /* link in list of channels per unit */
@@ -1597,7 +1628,7 @@ ppp_push(struct ppp *ppp)
list = list->next;
pch = list_entry(list, struct channel, clist);
 
-   spin_lock_bh(>downl);
+   PPP_CHANNEL_LOCK_BH(pch->downl);
if (pch->chan) {
if (pch->chan->ops->start_xmit(pch->chan, skb))
ppp->xmit_pending = NULL;
@@ -1606,7 +1637,7 @@ ppp_push(struct ppp *ppp)
kfree_skb(skb);
ppp->xmit_pending = NULL;
}
-   spin_unlock_bh(>downl);
+   PPP_CHANNEL_UNLOCK_BH(pch->downl);
return;
}
 
@@ -1736,7 +1767,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff 
*skb)
}
 
/* check the channel's mtu and whether it is still attached. */
-   spin_lock_bh(>downl);
+   PPP_CHANNEL_LOCK_BH(pch->downl);
if (pch->chan == NULL) {
/* can't use this channel, 

[PATCH 1/1] ppp: Fix one deadlock issue of PPP when send frame

2016-08-16 Thread fgao
From: Gao Feng 

PPP channel holds one spinlock before send frame. But the skb may
select the same PPP channel with wrong route policy. As a result,
the skb reaches the same channel path. It tries to get the same
spinlock which is held before. Bang, the deadlock comes out.

Now add one lock owner to avoid it like xmit_lock_owner of
netdev_queue. Check the lock owner before try to get the spinlock.
If the current cpu is already the owner, needn't lock again. When
PPP channel holds the spinlock at the first time, it sets owner
with current CPU ID.

The following is the panic stack of 3.3.8. But the same issue
should be in the upstream too.

[] ? _raw_spin_lock_bh+0x11/0x40
[] ppp_unregister_channel+0x1347/0x2170 [ppp_generic]
[] ? kmem_cache_free+0xa7/0xc0
[] ppp_unregister_channel+0x1db7/0x2170 [ppp_generic]
[] ppp_unregister_channel+0x2065/0x2170 [ppp_generic]
[] dev_hard_start_xmit+0x4cd/0x620
[] sch_direct_xmit+0x74/0x1d0
[] dev_queue_xmit+0x1d/0x30
[] neigh_direct_output+0xc/0x10
[] ip_finish_output+0x25e/0x2b0
[] ip_output+0x88/0x90
[] ? __ip_local_out+0x9f/0xb0
[] ip_local_out+0x24/0x30
[] 0xa00b9744
[] ppp_unregister_channel+0x20f8/0x2170 [ppp_generic]
[] ppp_output_wakeup+0x122/0x11d0 [ppp_generic]
[] vfs_write+0xb8/0x160
[] sys_write+0x45/0x90
[] system_call_fastpath+0x16/0x1b

The call flow is like this.
ppp_write->ppp_channel_push->start_xmit->select inappropriate route
 -> dev_hard_start_xmit->ppp_start_xmit->ppp_xmit_process->
ppp_push. Now ppp_push tries to get the same spinlock which is held
in ppp_channel_push.

Although the PPP deadlock is caused by inappropriate route policy
with L2TP, I think it is not accepted the PPP module would cause kernel
deadlock with wrong route policy.

Signed-off-by: Gao Feng 
---
 v1: Initial Patch

 drivers/net/ppp/ppp_generic.c | 49 +++
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 70cfa06..ffd0233 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -162,6 +162,29 @@ struct ppp {
 |SC_MULTILINK|SC_MP_SHORTSEQ|SC_MP_XSHORTSEQ \
 |SC_COMP_TCP|SC_REJ_COMP_TCP|SC_MUST_COMP)
 
+struct chennel_lock {
+   spinlock_t lock;
+   u32 owner;
+};
+
+#define PPP_CHANNEL_LOCK_INIT(cl) \
+   cl.owner = -1; \
+   spin_lock_init()
+
+#define PPP_CHANNEL_LOCK_BH(cl) \
+   do { \
+   local_bh_disable(); \
+   if (cl.owner != smp_processor_id()) { \
+   spin_lock(); \
+   cl.owner = smp_processor_id(); \
+   } \
+   } while (0)
+
+#define PPP_CHANNEL_UNLOCK_BH(cl) \
+   cl.owner = -1; \
+   spin_unlock(); \
+   local_bh_enable()
+
 /*
  * Private data structure for each channel.
  * This includes the data structure used for multilink.
@@ -171,7 +194,7 @@ struct channel {
struct list_head list;  /* link in all/new_channels list */
struct ppp_channel *chan;   /* public channel data structure */
struct rw_semaphore chan_sem;   /* protects `chan' during chan ioctl */
-   spinlock_t  downl;  /* protects `chan', file.xq dequeue */
+   struct chennel_lock downl;  /* protects `chan', file.xq dequeue */
struct ppp  *ppp;   /* ppp unit we're connected to */
struct net  *chan_net;  /* the net channel belongs to */
struct list_head clist; /* link in list of channels per unit */
@@ -1597,7 +1620,7 @@ ppp_push(struct ppp *ppp)
list = list->next;
pch = list_entry(list, struct channel, clist);
 
-   spin_lock_bh(>downl);
+   PPP_CHANNEL_LOCK_BH(pch->downl);
if (pch->chan) {
if (pch->chan->ops->start_xmit(pch->chan, skb))
ppp->xmit_pending = NULL;
@@ -1606,7 +1629,7 @@ ppp_push(struct ppp *ppp)
kfree_skb(skb);
ppp->xmit_pending = NULL;
}
-   spin_unlock_bh(>downl);
+   PPP_CHANNEL_UNLOCK_BH(pch->downl);
return;
}
 
@@ -1736,7 +1759,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff 
*skb)
}
 
/* check the channel's mtu and whether it is still attached. */
-   spin_lock_bh(>downl);
+   PPP_CHANNEL_LOCK_BH(pch->downl);
if (pch->chan == NULL) {
/* can't use this channel, it's being deregistered */
if (pch->speed == 0)
@@ -1744,7 +1767,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff 
*skb)
else
totspeed -= pch->speed;
 
-   spin_unlock_bh(>downl);
+   

[PATCH v1 1/1] pptp: Refactor the struct and macros of PPTP codes

2016-08-12 Thread fgao
From: Gao Feng 

1. Use struct gre_base_hdr directly in pptp_gre_header instead of
duplicated members;
2. Use existing macros like GRE_KEY, GRE_SEQ, and so on instead of
duplicated macros defined by PPTP;
3. Add new macros like GRE_IS_ACK/SEQ and so on instead of
PPTP_GRE_IS_A/S and so on;

Signed-off-by: Gao Feng 
---
 v1: Initial patch

 drivers/net/ppp/pptp.c | 28 +---
 include/net/pptp.h | 19 +--
 include/uapi/linux/if_tunnel.h | 12 ++--
 3 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 3e68dbc..1951b10 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -206,16 +206,14 @@ static int pptp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
skb_push(skb, header_len);
hdr = (struct pptp_gre_header *)(skb->data);
 
-   hdr->flags   = PPTP_GRE_FLAG_K;
-   hdr->ver = PPTP_GRE_VER;
-   hdr->protocol= htons(PPTP_GRE_PROTO);
-   hdr->call_id = htons(opt->dst_addr.call_id);
+   hdr->gre_hd.flags = GRE_KEY | GRE_VERSION_1 | GRE_SEQ;
+   hdr->gre_hd.protocol = GRE_PROTO_PPP;
+   hdr->call_id = htons(opt->dst_addr.call_id);
 
-   hdr->flags  |= PPTP_GRE_FLAG_S;
-   hdr->seq = htonl(++opt->seq_sent);
+   hdr->seq = htonl(++opt->seq_sent);
if (opt->ack_sent != seq_recv)  {
/* send ack with this message */
-   hdr->ver |= PPTP_GRE_FLAG_A;
+   hdr->gre_hd.flags |= GRE_ACK;
hdr->ack  = htonl(seq_recv);
opt->ack_sent = seq_recv;
}
@@ -278,7 +276,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff 
*skb)
headersize  = sizeof(*header);
 
/* test if acknowledgement present */
-   if (PPTP_GRE_IS_A(header->ver)) {
+   if (GRE_IS_ACK(header->gre_hd.flags)) {
__u32 ack;
 
if (!pskb_may_pull(skb, headersize))
@@ -286,7 +284,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff 
*skb)
header = (struct pptp_gre_header *)(skb->data);
 
/* ack in different place if S = 0 */
-   ack = PPTP_GRE_IS_S(header->flags) ? header->ack : header->seq;
+   ack = GRE_IS_SEQ(header->gre_hd.flags) ? header->ack : 
header->seq;
 
ack = ntohl(ack);
 
@@ -299,7 +297,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff 
*skb)
headersize -= sizeof(header->ack);
}
/* test if payload present */
-   if (!PPTP_GRE_IS_S(header->flags))
+   if (!GRE_IS_SEQ(header->gre_hd.flags))
goto drop;
 
payload_len = ntohs(header->payload_len);
@@ -360,11 +358,11 @@ static int pptp_rcv(struct sk_buff *skb)
 
header = (struct pptp_gre_header *)skb->data;
 
-   if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol 
for PPTP */
-   PPTP_GRE_IS_C(header->flags) ||/* flag C should 
be clear */
-   PPTP_GRE_IS_R(header->flags) ||/* flag R should 
be clear */
-   !PPTP_GRE_IS_K(header->flags) ||   /* flag K should 
be set */
-   (header->flags&0xF) != 0)  /* routing and 
recursion ctrl = 0 */
+   if (header->gre_hd.protocol != GRE_PROTO_PPP || /* PPTP-GRE protocol 
for PPTP */
+   GRE_IS_CSUM(header->gre_hd.flags) ||/* flag CSUM should be 
clear */
+   GRE_IS_ROUTING(header->gre_hd.flags) || /* flag ROUTING should 
be clear */
+   !GRE_IS_KEY(header->gre_hd.flags) ||/* flag KEY should be 
set */
+   (header->gre_hd.flags & GRE_FLAGS)) /* flag Recursion Ctrl 
should be clear */
/* if invalid, discard this packet */
goto drop;
 
diff --git a/include/net/pptp.h b/include/net/pptp.h
index 301d3e2..92e9f1f 100644
--- a/include/net/pptp.h
+++ b/include/net/pptp.h
@@ -10,26 +10,9 @@
curseq) & 0xff00) == 0) &&\
(((lastseq) & 0xff00) == 0xff00))
 
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER0x1
-
-#define PPTP_GRE_FLAG_C 0x80
-#define PPTP_GRE_FLAG_R 0x40
-#define PPTP_GRE_FLAG_K 0x20
-#define PPTP_GRE_FLAG_S 0x10
-#define PPTP_GRE_FLAG_A 0x80
-
-#define PPTP_GRE_IS_C(f) ((f)_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)_GRE_FLAG_A)
-
 #define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
 struct pptp_gre_header {
-   u8  flags;
-   u8  ver;
-   __be16 protocol;
+   struct gre_base_hdr gre_hd;
__be16 payload_len;
__be16 call_id;
__be32 seq;
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 60dbb20..361b9f0 100644
--- 

[PATCH v6 1/1] rps: Inspect PPTP encapsulated by GRE to get flow hash

2016-08-08 Thread fgao
From: Gao Feng 

The PPTP is encapsulated by GRE header with that GRE_VERSION bits
must contain one. But current GRE RPS needs the GRE_VERSION must be
zero. So RPS does not work for PPTP traffic.

In my test environment, there are four MIPS cores, and all traffic
are passed through by PPTP. As a result, only one core is 100% busy
while other three cores are very idle. After this patch, the usage
of four cores are balanced well.

Signed-off-by: Gao Feng 
---
 v6: 1) Keep the original v4 struct gre_base_hdr and gre_full_hdr style;
 2) Use __cpu_to_be32 instead of htonl;
 v5: 1) Make fix header of gre_full_hdr as uname struct;
 2) Create macro GRE_PPTP_KEY_MASK;
 v4: 1) Define struct gre_full_hdr, and use sizeof its member directly;
 2) Move version and routing check ahead;
 3) Only PPTP in GRE check the ack flag;
 v3: 1) Move struct pptp_gre_header defination into new file pptp.h
 2) Use sizeof GRE and PPTP type instead of literal value;
 3) Remove strict flag check for PPTP to robust;
 4) Consolidate the codes again;
 v2: Update according to Tom and Philp's advice.
 1) Consolidate the codes with GRE version 0 path;
 2) Use PPP_PROTOCOL to get ppp protol;
 3) Set the FLOW_DIS_ENCAPSULATION flag;
 v1: Intial Patch

 drivers/net/ppp/pptp.c |  36 +
 include/net/gre.h  |  10 +++-
 include/net/pptp.h |  40 +++
 include/uapi/linux/if_tunnel.h |   7 ++-
 net/core/flow_dissector.c  | 113 -
 5 files changed, 135 insertions(+), 71 deletions(-)
 create mode 100644 include/net/pptp.h

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index ae0905e..3e68dbc 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -53,41 +54,6 @@ static struct proto pptp_sk_proto __read_mostly;
 static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
-#define PPP_LCP_ECHOREQ 0x09
-#define PPP_LCP_ECHOREP 0x0A
-#define SC_RCV_BITS(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-#define MISSING_WINDOW 20
-#define WRAPPED(curseq, lastseq)\
-   curseq) & 0xff00) == 0) &&\
-   (((lastseq) & 0xff00) == 0xff00))
-
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER0x1
-
-#define PPTP_GRE_FLAG_C0x80
-#define PPTP_GRE_FLAG_R0x40
-#define PPTP_GRE_FLAG_K0x20
-#define PPTP_GRE_FLAG_S0x10
-#define PPTP_GRE_FLAG_A0x80
-
-#define PPTP_GRE_IS_C(f) ((f)_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)_GRE_FLAG_A)
-
-#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
-struct pptp_gre_header {
-   u8  flags;
-   u8  ver;
-   __be16 protocol;
-   __be16 payload_len;
-   __be16 call_id;
-   __be32 seq;
-   __be32 ack;
-} __packed;
-
 static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 {
struct pppox_sock *sock;
diff --git a/include/net/gre.h b/include/net/gre.h
index 7a54a31..8962e1e 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -7,7 +7,15 @@
 struct gre_base_hdr {
__be16 flags;
__be16 protocol;
-};
+} __packed;
+
+struct gre_full_hdr {
+   struct gre_base_hdr fixed_header;
+   __be16 csum;
+   __be16 reserved1;
+   __be32 key;
+   __be32 seq;
+} __packed;
 #define GRE_HEADER_SECTION 4
 
 #define GREPROTO_CISCO 0
diff --git a/include/net/pptp.h b/include/net/pptp.h
new file mode 100644
index 000..301d3e2
--- /dev/null
+++ b/include/net/pptp.h
@@ -0,0 +1,40 @@
+#ifndef _NET_PPTP_H
+#define _NET_PPTP_H
+
+#define PPP_LCP_ECHOREQ 0x09
+#define PPP_LCP_ECHOREP 0x0A
+#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+#define MISSING_WINDOW 20
+#define WRAPPED(curseq, lastseq)\
+   curseq) & 0xff00) == 0) &&\
+   (((lastseq) & 0xff00) == 0xff00))
+
+#define PPTP_GRE_PROTO  0x880B
+#define PPTP_GRE_VER0x1
+
+#define PPTP_GRE_FLAG_C 0x80
+#define PPTP_GRE_FLAG_R 0x40
+#define PPTP_GRE_FLAG_K 0x20
+#define PPTP_GRE_FLAG_S 0x10
+#define PPTP_GRE_FLAG_A 0x80
+
+#define PPTP_GRE_IS_C(f) ((f)_GRE_FLAG_C)
+#define PPTP_GRE_IS_R(f) ((f)_GRE_FLAG_R)
+#define PPTP_GRE_IS_K(f) ((f)_GRE_FLAG_K)
+#define PPTP_GRE_IS_S(f) ((f)_GRE_FLAG_S)
+#define PPTP_GRE_IS_A(f) ((f)_GRE_FLAG_A)
+
+#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
+struct pptp_gre_header {
+   u8  flags;
+   u8  ver;
+   __be16 protocol;
+   __be16 payload_len;
+   __be16 call_id;
+   __be32 seq;
+   __be32 ack;
+} __packed;
+
+
+#endif
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 1046f55..60dbb20 100644
--- a/include/uapi/linux/if_tunnel.h
+++ 

[PATCH v5 1/1] rps: Inspect PPTP encapsulated by GRE to get flow hash

2016-08-08 Thread fgao
From: Gao Feng 

The PPTP is encapsulated by GRE header with that GRE_VERSION bits
must contain one. But current GRE RPS needs the GRE_VERSION must be
zero. So RPS does not work for PPTP traffic.

In my test environment, there are four MIPS cores, and all traffic
are passed through by PPTP. As a result, only one core is 100% busy
while other three cores are very idle. After this patch, the usage
of four cores are balanced well.

Signed-off-by: Gao Feng 
---
 v5: 1) Make fix header of gre_full_hdr as uname struct;
 2) Create macro GRE_PPTP_KEY_MASK;
 v4: 1) Define struct gre_full_hdr, and use sizeof its member directly;
 2) Move version and routing check ahead;
 3) Only PPTP in GRE check the ack flag;
 v3: 1) Move struct pptp_gre_header defination into new file pptp.h
 2) Use sizeof GRE and PPTP type instead of literal value;
 3) Remove strict flag check for PPTP to robust;
 4) Consolidate the codes again;
 v2: Update according to Tom and Philp's advice.
 1) Consolidate the codes with GRE version 0 path;
 2) Use PPP_PROTOCOL to get ppp protol;
 3) Set the FLOW_DIS_ENCAPSULATION flag;
 v1: Intial Patch

 drivers/net/ppp/pptp.c |  36 +
 include/net/gre.h  |  13 -
 include/net/pptp.h |  40 +++
 include/uapi/linux/if_tunnel.h |   7 ++-
 net/core/flow_dissector.c  | 113 -
 5 files changed, 138 insertions(+), 71 deletions(-)
 create mode 100644 include/net/pptp.h

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index ae0905e..3e68dbc 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -53,41 +54,6 @@ static struct proto pptp_sk_proto __read_mostly;
 static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
-#define PPP_LCP_ECHOREQ 0x09
-#define PPP_LCP_ECHOREP 0x0A
-#define SC_RCV_BITS(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-#define MISSING_WINDOW 20
-#define WRAPPED(curseq, lastseq)\
-   curseq) & 0xff00) == 0) &&\
-   (((lastseq) & 0xff00) == 0xff00))
-
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER0x1
-
-#define PPTP_GRE_FLAG_C0x80
-#define PPTP_GRE_FLAG_R0x40
-#define PPTP_GRE_FLAG_K0x20
-#define PPTP_GRE_FLAG_S0x10
-#define PPTP_GRE_FLAG_A0x80
-
-#define PPTP_GRE_IS_C(f) ((f)_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)_GRE_FLAG_A)
-
-#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
-struct pptp_gre_header {
-   u8  flags;
-   u8  ver;
-   __be16 protocol;
-   __be16 payload_len;
-   __be16 call_id;
-   __be32 seq;
-   __be32 ack;
-} __packed;
-
 static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 {
struct pppox_sock *sock;
diff --git a/include/net/gre.h b/include/net/gre.h
index 7a54a31..6347f16 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -7,9 +7,20 @@
 struct gre_base_hdr {
__be16 flags;
__be16 protocol;
-};
+} __packed;
 #define GRE_HEADER_SECTION 4
 
+struct gre_full_hdr {
+   struct {
+   __be16 flags;
+   __be16 protocols;
+   };
+   __be16 csum;
+   __be16 reserved1;
+   __be32 key;
+   __be32 seq;
+} __packed;
+
 #define GREPROTO_CISCO 0
 #define GREPROTO_PPTP  1
 #define GREPROTO_MAX   2
diff --git a/include/net/pptp.h b/include/net/pptp.h
new file mode 100644
index 000..301d3e2
--- /dev/null
+++ b/include/net/pptp.h
@@ -0,0 +1,40 @@
+#ifndef _NET_PPTP_H
+#define _NET_PPTP_H
+
+#define PPP_LCP_ECHOREQ 0x09
+#define PPP_LCP_ECHOREP 0x0A
+#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+#define MISSING_WINDOW 20
+#define WRAPPED(curseq, lastseq)\
+   curseq) & 0xff00) == 0) &&\
+   (((lastseq) & 0xff00) == 0xff00))
+
+#define PPTP_GRE_PROTO  0x880B
+#define PPTP_GRE_VER0x1
+
+#define PPTP_GRE_FLAG_C 0x80
+#define PPTP_GRE_FLAG_R 0x40
+#define PPTP_GRE_FLAG_K 0x20
+#define PPTP_GRE_FLAG_S 0x10
+#define PPTP_GRE_FLAG_A 0x80
+
+#define PPTP_GRE_IS_C(f) ((f)_GRE_FLAG_C)
+#define PPTP_GRE_IS_R(f) ((f)_GRE_FLAG_R)
+#define PPTP_GRE_IS_K(f) ((f)_GRE_FLAG_K)
+#define PPTP_GRE_IS_S(f) ((f)_GRE_FLAG_S)
+#define PPTP_GRE_IS_A(f) ((f)_GRE_FLAG_A)
+
+#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
+struct pptp_gre_header {
+   u8  flags;
+   u8  ver;
+   __be16 protocol;
+   __be16 payload_len;
+   __be16 call_id;
+   __be32 seq;
+   __be32 ack;
+} __packed;
+
+
+#endif
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 1046f55..0c11918 100644
--- a/include/uapi/linux/if_tunnel.h
+++ 

[PATCH v4 1/1] rps: Inspect PPTP encapsulated by GRE to get flow hash

2016-08-04 Thread fgao
From: Gao Feng 

The PPTP is encapsulated by GRE header with that GRE_VERSION bits
must contain one. But current GRE RPS needs the GRE_VERSION must be
zero. So RPS does not work for PPTP traffic.

In my test environment, there are four MIPS cores, and all traffic
are passed through by PPTP. As a result, only one core is 100% busy
while other three cores are very idle. After this patch, the usage
of four cores are balanced well.

Signed-off-by: Gao Feng 
---
 v4: 1) Define struct gre_full_hdr, and use sizeof its member directly;
 2) Move version and routing check ahead;
 3) Only PPTP in GRE check the ack flag;
 v3: 1) Move struct pptp_gre_header defination into new file pptp.h
 2) Use sizeof GRE and PPTP type instead of literal value;
 3) Remove strict flag check for PPTP to robust;
 4) Consolidate the codes again;
 v2: Update according to Tom and Philp's advice.
 1) Consolidate the codes with GRE version 0 path;
 2) Use PPP_PROTOCOL to get ppp protol;
 3) Set the FLOW_DIS_ENCAPSULATION flag;
 v1: Intial Patch

 drivers/net/ppp/pptp.c |  36 +
 include/net/gre.h  |  10 +++-
 include/net/pptp.h |  40 +++
 include/uapi/linux/if_tunnel.h |   7 ++-
 net/core/flow_dissector.c  | 113 -
 5 files changed, 135 insertions(+), 71 deletions(-)
 create mode 100644 include/net/pptp.h

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index ae0905e..3e68dbc 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -53,41 +54,6 @@ static struct proto pptp_sk_proto __read_mostly;
 static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
-#define PPP_LCP_ECHOREQ 0x09
-#define PPP_LCP_ECHOREP 0x0A
-#define SC_RCV_BITS(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-#define MISSING_WINDOW 20
-#define WRAPPED(curseq, lastseq)\
-   curseq) & 0xff00) == 0) &&\
-   (((lastseq) & 0xff00) == 0xff00))
-
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER0x1
-
-#define PPTP_GRE_FLAG_C0x80
-#define PPTP_GRE_FLAG_R0x40
-#define PPTP_GRE_FLAG_K0x20
-#define PPTP_GRE_FLAG_S0x10
-#define PPTP_GRE_FLAG_A0x80
-
-#define PPTP_GRE_IS_C(f) ((f)_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)_GRE_FLAG_A)
-
-#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
-struct pptp_gre_header {
-   u8  flags;
-   u8  ver;
-   __be16 protocol;
-   __be16 payload_len;
-   __be16 call_id;
-   __be32 seq;
-   __be32 ack;
-} __packed;
-
 static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 {
struct pppox_sock *sock;
diff --git a/include/net/gre.h b/include/net/gre.h
index 7a54a31..c469dcc 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -7,9 +7,17 @@
 struct gre_base_hdr {
__be16 flags;
__be16 protocol;
-};
+} __packed;
 #define GRE_HEADER_SECTION 4
 
+struct gre_full_hdr {
+   struct gre_base_hdr fixed_header;
+   __be16 csum;
+   __be16 reserved1;
+   __be32 key;
+   __be32 seq;
+} __packed;
+
 #define GREPROTO_CISCO 0
 #define GREPROTO_PPTP  1
 #define GREPROTO_MAX   2
diff --git a/include/net/pptp.h b/include/net/pptp.h
new file mode 100644
index 000..301d3e2
--- /dev/null
+++ b/include/net/pptp.h
@@ -0,0 +1,40 @@
+#ifndef _NET_PPTP_H
+#define _NET_PPTP_H
+
+#define PPP_LCP_ECHOREQ 0x09
+#define PPP_LCP_ECHOREP 0x0A
+#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+#define MISSING_WINDOW 20
+#define WRAPPED(curseq, lastseq)\
+   curseq) & 0xff00) == 0) &&\
+   (((lastseq) & 0xff00) == 0xff00))
+
+#define PPTP_GRE_PROTO  0x880B
+#define PPTP_GRE_VER0x1
+
+#define PPTP_GRE_FLAG_C 0x80
+#define PPTP_GRE_FLAG_R 0x40
+#define PPTP_GRE_FLAG_K 0x20
+#define PPTP_GRE_FLAG_S 0x10
+#define PPTP_GRE_FLAG_A 0x80
+
+#define PPTP_GRE_IS_C(f) ((f)_GRE_FLAG_C)
+#define PPTP_GRE_IS_R(f) ((f)_GRE_FLAG_R)
+#define PPTP_GRE_IS_K(f) ((f)_GRE_FLAG_K)
+#define PPTP_GRE_IS_S(f) ((f)_GRE_FLAG_S)
+#define PPTP_GRE_IS_A(f) ((f)_GRE_FLAG_A)
+
+#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
+struct pptp_gre_header {
+   u8  flags;
+   u8  ver;
+   __be16 protocol;
+   __be16 payload_len;
+   __be16 call_id;
+   __be32 seq;
+   __be32 ack;
+} __packed;
+
+
+#endif
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 1046f55..7d889db 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -24,9 +24,14 @@
 #define GRE_SEQ__cpu_to_be16(0x1000)
 #define GRE_STRICT __cpu_to_be16(0x0800)
 

[PATCH v3 1/1] rps: Inspect PPTP encapsulated by GRE to get flow hash

2016-08-03 Thread fgao
From: Gao Feng 

The PPTP is encapsulated by GRE header with that GRE_VERSION bits
must contain one. But current GRE RPS needs the GRE_VERSION must be
zero. So RPS does not work for PPTP traffic.

In my test environment, there are four MIPS cores, and all traffic
are passed through by PPTP. As a result, only one core is 100% busy
while other three cores are very idle. After this patch, the usage
of four cores are balanced well.

Signed-off-by: Gao Feng 
---
 v3: 1) Move struct pptp_gre_header defination into new file pptp.h
 2) Use sizeof GRE and PPTP type instead of literal value;
 3) Remove strict flag check for PPTP to robust;
 4) Consolidate the codes again;
 v2: Update according to Tom and Philp's advice.
 1) Consolidate the codes with GRE version 0 path;
 2) Use PPP_PROTOCOL to get ppp protol;
 3) Set the FLOW_DIS_ENCAPSULATION flag;
 v1: Intial patch

 drivers/net/ppp/pptp.c |  36 +--
 include/net/pptp.h |  40 
 include/uapi/linux/if_tunnel.h |   7 +-
 net/core/flow_dissector.c  | 141 +
 4 files changed, 134 insertions(+), 90 deletions(-)
 create mode 100644 include/net/pptp.h

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index ae0905e..3e68dbc 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -53,41 +54,6 @@ static struct proto pptp_sk_proto __read_mostly;
 static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
-#define PPP_LCP_ECHOREQ 0x09
-#define PPP_LCP_ECHOREP 0x0A
-#define SC_RCV_BITS(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-#define MISSING_WINDOW 20
-#define WRAPPED(curseq, lastseq)\
-   curseq) & 0xff00) == 0) &&\
-   (((lastseq) & 0xff00) == 0xff00))
-
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER0x1
-
-#define PPTP_GRE_FLAG_C0x80
-#define PPTP_GRE_FLAG_R0x40
-#define PPTP_GRE_FLAG_K0x20
-#define PPTP_GRE_FLAG_S0x10
-#define PPTP_GRE_FLAG_A0x80
-
-#define PPTP_GRE_IS_C(f) ((f)_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)_GRE_FLAG_A)
-
-#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
-struct pptp_gre_header {
-   u8  flags;
-   u8  ver;
-   __be16 protocol;
-   __be16 payload_len;
-   __be16 call_id;
-   __be32 seq;
-   __be32 ack;
-} __packed;
-
 static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 {
struct pppox_sock *sock;
diff --git a/include/net/pptp.h b/include/net/pptp.h
new file mode 100644
index 000..301d3e2
--- /dev/null
+++ b/include/net/pptp.h
@@ -0,0 +1,40 @@
+#ifndef _NET_PPTP_H
+#define _NET_PPTP_H
+
+#define PPP_LCP_ECHOREQ 0x09
+#define PPP_LCP_ECHOREP 0x0A
+#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+#define MISSING_WINDOW 20
+#define WRAPPED(curseq, lastseq)\
+   curseq) & 0xff00) == 0) &&\
+   (((lastseq) & 0xff00) == 0xff00))
+
+#define PPTP_GRE_PROTO  0x880B
+#define PPTP_GRE_VER0x1
+
+#define PPTP_GRE_FLAG_C 0x80
+#define PPTP_GRE_FLAG_R 0x40
+#define PPTP_GRE_FLAG_K 0x20
+#define PPTP_GRE_FLAG_S 0x10
+#define PPTP_GRE_FLAG_A 0x80
+
+#define PPTP_GRE_IS_C(f) ((f)_GRE_FLAG_C)
+#define PPTP_GRE_IS_R(f) ((f)_GRE_FLAG_R)
+#define PPTP_GRE_IS_K(f) ((f)_GRE_FLAG_K)
+#define PPTP_GRE_IS_S(f) ((f)_GRE_FLAG_S)
+#define PPTP_GRE_IS_A(f) ((f)_GRE_FLAG_A)
+
+#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
+struct pptp_gre_header {
+   u8  flags;
+   u8  ver;
+   __be16 protocol;
+   __be16 payload_len;
+   __be16 call_id;
+   __be32 seq;
+   __be32 ack;
+} __packed;
+
+
+#endif
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 1046f55..7d889db 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -24,9 +24,14 @@
 #define GRE_SEQ__cpu_to_be16(0x1000)
 #define GRE_STRICT __cpu_to_be16(0x0800)
 #define GRE_REC__cpu_to_be16(0x0700)
-#define GRE_FLAGS  __cpu_to_be16(0x00F8)
+#define GRE_ACK__cpu_to_be16(0x0080)
+#define GRE_FLAGS  __cpu_to_be16(0x0078)
 #define GRE_VERSION__cpu_to_be16(0x0007)
 
+#define GRE_VERSION_1  __cpu_to_be16(0x0001)
+#define GRE_PROTO_PPP  __cpu_to_be16(0x880b)
+
+
 struct ip_tunnel_parm {
charname[IFNAMSIZ];
int link;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f..52b7c3c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -6,6 +6,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -338,71 +340,102 @@ mpls:
 

[PATCH 1/1] rps: Inspect PPTP encapsulated by GRE to get flow hash

2016-07-28 Thread fgao
From: Gao Feng 

The PPTP is encapsulated by GRE header with that GRE_VERSION bits
must contain one. But current GRE RPS needs the GRE_VERSION must be
zero. So RPS does not work for PPTP traffic.

In my test environment, there are four MIPS cores, and all traffic
are passed through by PPTP. As a result, only one core is 100% busy
while other three cores are very idle. After this patch, the usage
of four cores are balanced well.

Signed-off-by: Gao Feng 
---
 v2: Update according to Tom and Philp's advice. 
 1) Consolidate the codes with GRE version 0 path;
 2) Use PPP_PROTOCOL to get ppp protol;
 3) Set the FLOW_DIS_ENCAPSULATION flag;
 v1: Initial patch 

 include/uapi/linux/if_tunnel.h |   5 +-
 net/core/flow_dissector.c  | 146 ++---
 2 files changed, 97 insertions(+), 54 deletions(-)

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 1046f55..dda4e4b 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -24,9 +24,12 @@
 #define GRE_SEQ__cpu_to_be16(0x1000)
 #define GRE_STRICT __cpu_to_be16(0x0800)
 #define GRE_REC__cpu_to_be16(0x0700)
-#define GRE_FLAGS  __cpu_to_be16(0x00F8)
+#define GRE_ACK__cpu_to_be16(0x0080)
+#define GRE_FLAGS  __cpu_to_be16(0x0078)
 #define GRE_VERSION__cpu_to_be16(0x0007)
 
+#define GRE_PROTO_PPP  __cpu_to_be16(0x880b)
+
 struct ip_tunnel_parm {
charname[IFNAMSIZ];
int link;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f..33e957b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -346,63 +346,103 @@ ip_proto_again:
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, 
hlen, &_hdr);
if (!hdr)
goto out_bad;
-   /*
-* Only look inside GRE if version zero and no
-* routing
-*/
-   if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
-   break;
-
-   proto = hdr->proto;
-   nhoff += 4;
-   if (hdr->flags & GRE_CSUM)
-   nhoff += 4;
-   if (hdr->flags & GRE_KEY) {
-   const __be32 *keyid;
-   __be32 _keyid;
-
-   keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
-data, hlen, &_keyid);
 
-   if (!keyid)
-   goto out_bad;
+   /* Only look inside GRE without routing */
+   if (!(hdr->flags & GRE_ROUTING)) {
+   proto = hdr->proto;
+
+   if (hdr->flags & GRE_VERSION) {
+   /* It should be the PPTP in GRE */
+   u8 _ppp_hdr[PPP_HDRLEN];
+   u8 *ppp_hdr;
+   int offset = 0;
+
+   /* Check the flags according to RFC 2637*/
+   if (!(proto == GRE_PROTO_PPP && (hdr->flags & 
GRE_KEY) &&
+ !(hdr->flags & (GRE_CSUM | GRE_STRICT | 
GRE_REC | GRE_FLAGS {
+   break;
+   }
+
+   /* Skip GRE header */
+   offset += 4;
+   /* Skip payload length and call id */
+   offset += 4;
+
+   if (hdr->flags & GRE_SEQ)
+   offset += 4;
+
+   if (hdr->flags & GRE_ACK)
+   offset += 4;
+
+   ppp_hdr = skb_header_pointer(skb, nhoff + 
offset, sizeof(_ppp_hdr), _ppp_hdr);
+   if (!ppp_hdr)
+   goto out_bad;
+   proto = PPP_PROTOCOL(ppp_hdr);
+   if (proto == PPP_IP) {
+   nhoff += (PPP_HDRLEN + offset);
+   proto = htons(ETH_P_IP);
+   key_control->flags |= 
FLOW_DIS_ENCAPSULATION;
+   goto again;
+   } else if (proto == PPP_IPV6) {
+   nhoff += (PPP_HDRLEN + offset);
+   proto = htons(ETH_P_IPV6);
+   key_control->flags |= 
FLOW_DIS_ENCAPSULATION;
+   goto again;
+   }
+   } else {
+   /* Original GRE */
+  

[PATCH 1/1] rps: Inspect PPTP encapsulated by GRE to get flow hash

2016-07-27 Thread fgao
From: Gao Feng 

The PPTP is encapsulated by GRE header with that GRE_VERSION bits
must contain one. But current GRE RPS needs the GRE_VERSION must be
zero. So RPS does not work for PPTP traffic.

In my test environment, there are four MIPS cores, and all traffic
are passed through by PPTP. As a result, only one core is 100% busy
while other three cores are very idle. After this patch, the usage
of four cores are balanced well.

Signed-off-by: Gao Feng 
---
 v1: Initial patch

 include/uapi/linux/if_tunnel.h |   5 +-
 net/core/flow_dissector.c  | 138 ++---
 2 files changed, 92 insertions(+), 51 deletions(-)

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 1046f55..dda4e4b 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -24,9 +24,12 @@
 #define GRE_SEQ__cpu_to_be16(0x1000)
 #define GRE_STRICT __cpu_to_be16(0x0800)
 #define GRE_REC__cpu_to_be16(0x0700)
-#define GRE_FLAGS  __cpu_to_be16(0x00F8)
+#define GRE_ACK__cpu_to_be16(0x0080)
+#define GRE_FLAGS  __cpu_to_be16(0x0078)
 #define GRE_VERSION__cpu_to_be16(0x0007)
 
+#define GRE_PROTO_PPP  __cpu_to_be16(0x880b)
+
 struct ip_tunnel_parm {
charname[IFNAMSIZ];
int link;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f..d95e060 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -346,63 +346,101 @@ ip_proto_again:
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, 
hlen, &_hdr);
if (!hdr)
goto out_bad;
-   /*
-* Only look inside GRE if version zero and no
-* routing
-*/
-   if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
-   break;
 
-   proto = hdr->proto;
-   nhoff += 4;
-   if (hdr->flags & GRE_CSUM)
+   /*
+   * Only look inside GRE if version zero and no
+   * routing
+   */
+   if (!(hdr->flags & (GRE_VERSION | GRE_ROUTING))) {
+   proto = hdr->proto;
nhoff += 4;
-   if (hdr->flags & GRE_KEY) {
-   const __be32 *keyid;
-   __be32 _keyid;
+   if (hdr->flags & GRE_CSUM)
+   nhoff += 4;
+   if (hdr->flags & GRE_KEY) {
+   const __be32 *keyid;
+   __be32 _keyid;
+
+   keyid = __skb_header_pointer(skb, nhoff, 
sizeof(_keyid),
+data, hlen, 
&_keyid);
+
+   if (!keyid)
+   goto out_bad;
+
+   if (dissector_uses_key(flow_dissector,
+  
FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+   key_keyid = 
skb_flow_dissector_target(flow_dissector,
+ 
FLOW_DISSECTOR_KEY_GRE_KEYID,
+ 
target_container);
+   key_keyid->keyid = *keyid;
+   }
+   nhoff += 4;
+   }
+   if (hdr->flags & GRE_SEQ)
+   nhoff += 4;
+   if (proto == htons(ETH_P_TEB)) {
+   const struct ethhdr *eth;
+   struct ethhdr _eth;
+
+   eth = __skb_header_pointer(skb, nhoff,
+  sizeof(_eth),
+  data, hlen, &_eth);
+   if (!eth)
+   goto out_bad;
+   proto = eth->h_proto;
+   nhoff += sizeof(*eth);
+
+   /* Cap headers that we access via pointers at 
the
+* end of the Ethernet header as our maximum 
alignment
+* at that point is only 2 bytes.
+*/
+   if (NET_IP_ALIGN)
+   hlen = nhoff;
+   }
 
-   keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
-data, hlen, &_keyid);
+   key_control->flags |= FLOW_DIS_ENCAPSULATION;
+   if (flags 

  1   2   >