from:"Haishuang Yan"

[PATCH] geneve: fix tx_errors statistics

2016-06-21 Thread Haishuang Yan

Tx errors present summation of errors encountered while transmitting
packets.

Signed-off-by: Haishuang Yan 
---
 drivers/net/geneve.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index aa61708..72b2f1c 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -922,8 +922,8 @@ tx_error:
dev->stats.collisions++;
else if (err == -ENETUNREACH)
dev->stats.tx_carrier_errors++;
-   else
-   dev->stats.tx_errors++;
+
+   dev->stats.tx_errors++;
return NETDEV_TX_OK;
 }
 
@@ -1012,8 +1012,8 @@ tx_error:
dev->stats.collisions++;
else if (err == -ENETUNREACH)
dev->stats.tx_carrier_errors++;
-   else
-   dev->stats.tx_errors++;
+
+   dev->stats.tx_errors++;
return NETDEV_TX_OK;
 }
 #endif
-- 
1.8.3.1

[PATCH 1/2] ip6_gre: Fix get_size calculation for gre6 tunnel

2016-05-11 Thread Haishuang Yan

Do not include attribute IFLA_GRE_TOS.

Signed-off-by: Haishuang Yan 
---
 net/ipv6/ip6_gre.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index ee62ec4..3c25fe6 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1394,8 +1394,6 @@ static size_t ip6gre_get_size(const struct net_device 
*dev)
nla_total_size(sizeof(struct in6_addr)) +
/* IFLA_GRE_TTL */
nla_total_size(1) +
-   /* IFLA_GRE_TOS */
-   nla_total_size(1) +
/* IFLA_GRE_ENCAP_LIMIT */
nla_total_size(1) +
/* IFLA_GRE_FLOWINFO */
@@ -1420,7 +1418,6 @@ static int ip6gre_fill_info(struct sk_buff *skb, const 
struct net_device *dev)
nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) ||
nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
-   /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
-- 
1.8.3.1

[PATCH 2/2] gre: Fix wrong tpi->proto in WCCP

2016-05-11 Thread Haishuang Yan

When dealing with WCCP in gre6 tunnel, it sets the wrong tpi->protocol,
that is, ETH_P_IP instead of ETH_P_IPV6 for the encapuslated traffic.

Signed-off-by: Haishuang Yan 
---
 include/net/gre.h| 2 +-
 net/ipv4/gre_demux.c | 6 +++---
 net/ipv4/ip_gre.c| 4 ++--
 net/ipv6/ip6_gre.c   | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/net/gre.h b/include/net/gre.h
index a14093c..5dce30a 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -26,7 +26,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 
version);
 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
   u8 name_assign_type);
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-bool *csum_err);
+bool *csum_err, __be16 proto);
 
 static inline int gre_calc_hlen(__be16 o_flags)
 {
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index d78e2ee..4c39f4f 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(gre_del_protocol);
 
 /* Fills in tpi and returns header length to be pulled. */
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-bool *csum_err)
+bool *csum_err, __be16 proto)
 {
const struct gre_base_hdr *greh;
__be32 *options;
@@ -109,11 +109,11 @@ int gre_parse_header(struct sk_buff *skb, struct 
tnl_ptk_info *tpi,
tpi->seq = 0;
}
/* WCCP version 1 and 2 protocol decoding.
-* - Change protocol to IP
+* - Change protocol to IPv4/IPv6
 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
 */
if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
-   tpi->proto = htons(ETH_P_IP);
+   tpi->proto = proto;
if ((*(u8 *)options & 0xF0) != 0x40)
hdr_len += 4;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2b267e7..aaeb478 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -222,7 +222,7 @@ static void gre_err(struct sk_buff *skb, u32 info)
struct tnl_ptk_info tpi;
bool csum_err = false;
 
-   if (gre_parse_header(skb, &tpi, &csum_err) < 0) {
+   if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) {
if (!csum_err)  /* ignore csum errors. */
return;
}
@@ -335,7 +335,7 @@ static int gre_rcv(struct sk_buff *skb)
}
 #endif
 
-   hdr_len = gre_parse_header(skb, &tpi, &csum_err);
+   hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP));
if (hdr_len < 0)
goto drop;
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c25fe6..4541fa5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb)
bool csum_err = false;
int hdr_len;
 
-   hdr_len = gre_parse_header(skb, &tpi, &csum_err);
+   hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6));
if (hdr_len < 0)
goto drop;
 
-- 
1.8.3.1

[PATCH v2 2/2] ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path.

2016-05-21 Thread Haishuang Yan

In gre6 xmit path, we are sending a GRE packet, so set fl6 proto
to IPPROTO_GRE properly.

Signed-off-by: Haishuang Yan 
---
Changes in v2:
  - Initialize the flow protocol in ip6gre_tnl_link_config
---
 net/ipv6/ip6_gre.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8ea5a4d..e706621 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -712,6 +712,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int 
set_mtu)
fl6->daddr = p->raddr;
fl6->flowi6_oif = p->link;
fl6->flowlabel = 0;
+   fl6->flowi6_proto = IPPROTO_GRE;
 
if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
-- 
1.8.3.1

[PATCH v2 1/2] ip6_gre: Fix MTU setting for ip6gretap

2016-05-21 Thread Haishuang Yan

When creat an ip6gretap interface with an unreachable route,
the MTU is about 14 bytes larger than what was needed.

If the remote address is reachable:
ping6 2001:0:130::1 -c 2
PING 2001:0:130::1(2001:0:130::1) 56 data bytes
64 bytes from 2001:0:130::1: icmp_seq=1 ttl=64 time=1.46 ms
64 bytes from 2001:0:130::1: icmp_seq=2 ttl=64 time=81.1 ms

--- 2001:0:130::1 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1001ms
rtt min/avg/max/mdev = 1.465/41.316/81.167/39.851 ms

ip link add ip6gretap1 type ip6gretap\
 local 2001:0:130::2 remote 2001:0:130::1
ip link show ip6gretap1
11: ip6gretap1@NONE:  mtu 1434 ...
link/ether c2:f3:f8:c1:2c:bf brd ff:ff:ff:ff:ff:ff

The MTU value 1434 is right. But if we delete the direct route:
ip -6 route del 2001:0:130::/64
ping6 2001:0:130::1 -c 2
connect: Network is unreachable
ip link add ip6gretap1 type ip6gretap\
 local 2001:0:130::2 remote 2001:0:130::1
ip link show ip6gretap1
12: ip6gretap1@NONE:  mtu 1448 ...
link/ether 7e:e1:d2:c4:06:5e brd ff:ff:ff:ff:ff:ff

Now, the MTU value 1448 is larger than what was needed.

The reason is that if there is a reachable route, when
run following code in ip6gre_tnl_link_config:

if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
  (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));

struct rt6_info *rt = rt6_lookup(t->net,
 &p->raddr, &p->laddr,
 p->link, strict);

if (!rt)
return;

if (rt->dst.dev) {
dev->hard_header_len = rt->dst.dev->hard_header_len +
   t_hlen;

if (set_mtu) {
dev->mtu = rt->dst.dev->mtu - t_hlen;
if (!(t->parms.flags & 
IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
if (dev->type == ARPHRD_ETHER)
dev->mtu -= ETH_HLEN;

if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
}
ip6_rt_put(rt);
}

Because rt is not NULL here, so dev->mtu will subtract the ethernet
header length later. But when rt is NULL, it just simply return, so
dev->mtu doesn't update correctly in this situation.

This patch first verify the dev->type is ARPHRD_ETHER for ip6gretap
interface, and then decrease the mtu as early as possible.

Signed-off-by: Haishuang Yan 
---
Changes in v2:
  - Make the commit message more clearer.
---
 net/ipv6/ip6_gre.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4541fa5..8ea5a4d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1029,6 +1029,8 @@ static int ip6gre_tunnel_init_common(struct net_device 
*dev)
 
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
+   if (dev->type == ARPHRD_ETHER)
+   dev->mtu -= ETH_HLEN;
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
 
-- 
1.8.3.1

[PATCH] ip_tunnel: enclose a code block in macro IS_ENABLED(CONFIG_IPV6)

2016-05-22 Thread Haishuang Yan

For ipv6 case, enclose the code block in macro IS_ENABLED(CONFIG_IPV6).

Signed-off-by: Haishuang Yan 
---
 net/ipv4/ip_tunnel.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a69ed94..5f3c8de 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -665,10 +665,13 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct 
net_device *dev,
if (skb->protocol == htons(ETH_P_IP)) {
tos = inner_iph->tos;
connected = false;
-   } else if (skb->protocol == htons(ETH_P_IPV6)) {
+   }
+#if IS_ENABLED(CONFIG_IPV6)
+   else if (skb->protocol == htons(ETH_P_IPV6)) {
tos = ipv6_get_dsfield((const struct ipv6hdr 
*)inner_iph);
connected = false;
}
+#endif
}
 
init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-- 
1.8.3.1

[PATCH v2] ip_tunnel: enclose a code block in macro IS_ENABLED(CONFIG_IPV6)

2016-05-23 Thread Haishuang Yan

For ipv6 case, enclose the code block in macro IS_ENABLED(CONFIG_IPV6).

---
Changes in v2:
  - Place the "#if IS_ENABLED" block before the "} else if
(..) {" piece and the "#endif" before the closing brace and this
becomes much easier to look at.

Signed-off-by: Haishuang Yan 
---
 net/ipv4/ip_tunnel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a69ed94..4256349 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -665,9 +665,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device 
*dev,
if (skb->protocol == htons(ETH_P_IP)) {
tos = inner_iph->tos;
connected = false;
+#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
tos = ipv6_get_dsfield((const struct ipv6hdr 
*)inner_iph);
connected = false;
+#endif
}
}
 
-- 
1.8.3.1

[PATCH 2/2] ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path.

2016-05-18 Thread Haishuang Yan

In gre6 xmit path, we are sending a GRE packet, so set fl6 proto
to IPPROTO_GRE properly.

Signed-off-by: Haishuang Yan 
---
 net/ipv6/ip6_gre.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8ea5a4d..cc84098 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -541,6 +541,7 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, 
struct net_device *dev)
encap_limit = t->parms.encap_limit;
 
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+   fl6.flowi6_proto = IPPROTO_GRE;
 
dsfield = ipv4_get_dsfield(iph);
 
@@ -595,6 +596,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, 
struct net_device *dev)
encap_limit = t->parms.encap_limit;
 
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+   fl6.flowi6_proto = IPPROTO_GRE;
 
dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-- 
1.8.3.1

[PATCH 1/2] ip6_gre: Fix MTU setting for ip6gretap

2016-05-18 Thread Haishuang Yan

When creat an ip6gretap interface with an unreachable route,
the MTU is about 14 bytes larger than what was needed.

If the remote address is reachable:
ping6 2001:0:130::1 -c 2
PING 2001:0:130::1(2001:0:130::1) 56 data bytes
64 bytes from 2001:0:130::1: icmp_seq=1 ttl=64 time=1.46 ms
64 bytes from 2001:0:130::1: icmp_seq=2 ttl=64 time=81.1 ms

--- 2001:0:130::1 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1001ms
rtt min/avg/max/mdev = 1.465/41.316/81.167/39.851 ms

ip link add ip6gretap1 type ip6gretap\
 local 2001:0:130::2 remote 2001:0:130::1
ip link show ip6gretap1
11: ip6gretap1@NONE:  mtu 1434 ...
link/ether c2:f3:f8:c1:2c:bf brd ff:ff:ff:ff:ff:ff

The MTU value 1434 is right. But if we delete the direct route:
ip -6 route del 2001:0:130::/64
ping6 2001:0:130::1 -c 2
connect: Network is unreachable
ip link add ip6gretap1 type ip6gretap\
 local 2001:0:130::2 remote 2001:0:130::1
ip link show ip6gretap1
12: ip6gretap1@NONE:  mtu 1448 ...
link/ether 7e:e1:d2:c4:06:5e brd ff:ff:ff:ff:ff:ff

Now, the MTU value 1448 is larger than what was needed.

This patch fix the issue in this situation.

Signed-off-by: Haishuang Yan 
---
 net/ipv6/ip6_gre.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4541fa5..8ea5a4d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1029,6 +1029,8 @@ static int ip6gre_tunnel_init_common(struct net_device 
*dev)
 
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
+   if (dev->type == ARPHRD_ETHER)
+   dev->mtu -= ETH_HLEN;
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
 
-- 
1.8.3.1

[PATCH] veth: Fix potential memory leak in veth_newlink

2016-04-27 Thread Haishuang Yan

Free peer netdev when failed to configure peer link or register dev.

Signed-off-by: Haishuang Yan 
---
 drivers/net/veth.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index f37a6e6..8bb9fb8 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -472,7 +472,6 @@ err_register_dev:
/* nothing to do */
 err_configure_peer:
unregister_netdevice(peer);
-   return err;
 
 err_register_peer:
free_netdev(peer);
-- 
1.8.3.1

[PATCH] netlink: use nla_get_in_addr and nla_put_in_addr for ipv4 address

2016-03-31 Thread Haishuang Yan

Since nla_get_in_addr and nla_put_in_addr were implemented,
so use them appropriately.

Signed-off-by: Haishuang Yan 
---
 net/ipv4/ip_tunnel_core.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 02dd990..47ea85d 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -247,10 +247,10 @@ static int ip_tun_build_state(struct net_device *dev, 
struct nlattr *attr,
tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]);
 
if (tb[LWTUNNEL_IP_DST])
-   tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]);
+   tun_info->key.u.ipv4.dst = nla_get_in_addr(tb[LWTUNNEL_IP_DST]);
 
if (tb[LWTUNNEL_IP_SRC])
-   tun_info->key.u.ipv4.src = nla_get_be32(tb[LWTUNNEL_IP_SRC]);
+   tun_info->key.u.ipv4.src = nla_get_in_addr(tb[LWTUNNEL_IP_SRC]);
 
if (tb[LWTUNNEL_IP_TTL])
tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]);
@@ -275,8 +275,8 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
 
if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) ||
-   nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) ||
-   nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
+   nla_put_in_addr(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) ||
+   nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
-- 
1.8.3.1

[PATCH] netfilter: unnecessary to check whether ip6_route_output() returns NULL

2016-04-03 Thread Haishuang Yan

ip6_route_output() never returns NULL, so it is not appropriate to
check if the return value is NULL.

Signed-off-by: Haishuang Yan 
---
 net/ipv6/netfilter/nf_reject_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c 
b/net/ipv6/netfilter/nf_reject_ipv6.c
index 4709f65..a540022 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -158,7 +158,7 @@ void nf_send_reset6(struct net *net, struct sk_buff 
*oldskb, int hook)
fl6.fl6_dport = otcph->source;
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
-   if (dst == NULL || dst->error) {
+   if (dst->error) {
dst_release(dst);
return;
}
-- 
1.8.3.1

[PATCH 2/2] ipv6: l2tp: fix a potential issue in l2tp_ip6_recv

2016-04-03 Thread Haishuang Yan

pskb_may_pull() can change skb->data, so we have to load ptr/optr at the
right place.

Signed-off-by: Haishuang Yan 
---
 net/l2tp/l2tp_ip6.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 6b54ff3..cd47990 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -136,12 +136,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
int length;
 
-   /* Point to L2TP header */
-   optr = ptr = skb->data;
-
if (!pskb_may_pull(skb, 4))
goto discard;
 
+   /* Point to L2TP header */
+   optr = ptr = skb->data;
session_id = ntohl(*((__be32 *) ptr));
ptr += 4;
 
@@ -169,6 +168,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
if (!pskb_may_pull(skb, length))
goto discard;
 
+   /* Point to L2TP header */
+   optr = ptr = skb->data;
+   ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
-- 
1.8.3.1

[PATCH 1/2] ipv4: l2tp: fix a potential issue in l2tp_ip_recv

2016-04-03 Thread Haishuang Yan

pskb_may_pull() can change skb->data, so we have to load ptr/optr at the
right place.

Signed-off-by: Haishuang Yan 
---
 net/l2tp/l2tp_ip.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index ec22078..42de4cc 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
int length;
 
-   /* Point to L2TP header */
-   optr = ptr = skb->data;
-
if (!pskb_may_pull(skb, 4))
goto discard;
 
+   /* Point to L2TP header */
+   optr = ptr = skb->data;
session_id = ntohl(*((__be32 *) ptr));
ptr += 4;
 
@@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
if (!pskb_may_pull(skb, length))
goto discard;
 
+   /* Point to L2TP header */
+   optr = ptr = skb->data;
+   ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
-- 
1.8.3.1

[PATCH v3 1/3] selftests: netfilter: add ipvs test script

2019-10-01 Thread Haishuang Yan

Test virutal server via directing routing for IPv4.

Tested:

# selftests: netfilter: ipvs.sh
# Testing DR mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v3: use bash style
v2: optimize test script
---
 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 184 +
 2 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

diff --git a/tools/testing/selftests/netfilter/Makefile 
b/tools/testing/selftests/netfilter/Makefile
index 4144984..de1032b 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -2,6 +2,6 @@
 # Makefile for netfilter selftests
 
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
-   conntrack_icmp_related.sh nft_flowtable.sh
+   conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
new file mode 100755
index 000..6201046
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--+
+#  |   |
+# ns0  | ns1   |
+#  --- | ------|
+#  | veth01  | - | veth10  || veth12  ||
+#  ---peer   ------|
+#   |  ||  |
+#  --- ||  |
+#  |  br0| |-  peer |--|
+#  --- ||  |
+#   |  ||  |
+#  -- peer   --  ---   |
+#  |  veth02 | - |  veth20 | | veth21  |   |
+#  --  | --  ---   |
+#  | ns2   |
+#  |   |
+#--+
+#
+# We assume that all network driver are loaded
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+
+sysipvsnet=/proc/sys/net/ipv4/vs/
+if [ ! -d /proc/sys/net/ipv4/vs/ ]; then
+modprobe -q ip_vs
+if [ $? -ne 0 ]; then
+echo "SKIP: Could not run test without ipvs module"
+   exit $ksft_skip
+fi
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ip tool"
+   exit $ksft_skip
+fi
+
+ipvsadm -v > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ipvsadm"
+   exit $ksft_skip
+fi
+
+nc --version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ncat"
+   exit $ksft_skip
+fi
+
+setup() {
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
+ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
+ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
+
+ip netns exec ns0 ip link set veth01 up
+ip netns exec ns0 ip link set veth02 up
+ip netns exec ns0 ip link add br0 type bridge
+ip netns exec ns0 ip link set veth01 master br0
+ip netns exec ns0 ip link set veth02 master br0
+ip netns exec ns0 ip link set br0 up
+ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
+
+ip netns exec ns1 ip link set lo up
+ip netns exec ns1 ip link set veth10 up
+ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
+ip netns exec ns1 ip link set veth12 up
+ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
+
+ip netns exec ns2 ip link set lo up
+ip netns exec ns2 ip link set veth21 up
+ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
+ip netns exec ns2 ip link set veth20 up
+ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
+}
+
+cleanup() {
+for i in 0 1 2
+do
+   ip netns del ns$i > /dev/null 2>&1
+done
+pkill nc
+}
+
+server_listen() {
+   ip netns exec ns2 nc -l -p 8080 > "${outfile}" &
+   ser

[PATCH v3 2/3] selftests: netfilter: add ipvs nat test case

2019-10-01 Thread Haishuang Yan

Test virtual server via NAT.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# ipvs.sh: PASS

Signed-off-by: Haishuang Yan 
---
 tools/testing/selftests/netfilter/ipvs.sh | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index 6201046..270b5da 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -160,20 +160,40 @@ test_dr() {
 test_service
 }
 
+test_nat() {
+ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ip netns exec ns2 ip link del veth20
+ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
+
+test_service
+}
+
 run_tests() {
local errors=
 
echo "Testing DR mode..."
+   cleanup
setup
test_dr
errors=$(( $errors + $? ))
 
+   echo "Testing NAT mode..."
+   cleanup
+   setup
+   test_nat
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
 trap cleanup EXIT
 
-cleanup
 run_tests
 
 if [ $? -ne 0 ]; then
-- 
1.8.3.1

[PATCH v3 0/3] selftests: netfilter: introduce test cases for ipvs

2019-10-01 Thread Haishuang Yan

This series patch include test cases for ipvs.

The test topology is who as below:
+--+
|  |   |
| ns0  | ns1   |
|  --- | ------|
|  | veth01  | - | veth10  || veth12  ||
|  ---peer   ------|
|   |  ||  |
|  --- ||  |
|  |  br0| |-  peer |--|
|  --- ||  |
|   |  ||  |
|  -- peer   --  ---   |
|  |  veth02 | - |  veth20 | | veth12  |   |
|  --  | --  ---   |
|  | ns2   |
|  |   |
+--+

Test results:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Haishuang Yan (3):
  selftests: netfilter: add ipvs test script
  selftests: netfilter: add ipvs nat test case
  selftests: netfilter: add ipvs tunnel test case

 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 234 +
 2 files changed, 235 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

-- 
1.8.3.1

[PATCH v3 3/3] selftests: netfilter: add ipvs tunnel test case

2019-10-01 Thread Haishuang Yan

Test virtual server via ipip tunnel.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v2: optimize test script
---
 tools/testing/selftests/netfilter/ipvs.sh | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index 270b5da..a8f6e70 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -174,6 +174,30 @@ test_nat() {
 test_service
 }
 
+test_tun() {
+ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ip netns exec ns1 modprobe ipip
+ip netns exec ns1 ip link set tunl0 up
+ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
+ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
+ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
+ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ip netns exec ns2 modprobe ipip
+ip netns exec ns2 ip link set tunl0 up
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
+ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+test_service
+}
+
 run_tests() {
local errors=
 
@@ -189,6 +213,12 @@ run_tests() {
test_nat
errors=$(( $errors + $? ))
 
+   echo "Testing Tunnel mode..."
+   cleanup
+   setup
+   test_tun
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
-- 
1.8.3.1

[PATCH] ip6erspan: remove the incorrect mtu limit for ip6erspan

2019-10-08 Thread Haishuang Yan

ip6erspan driver calls ether_setup(), after commit 61e84623ace3
("net: centralize net_device min/max MTU checking"), the range
of mtu is [min_mtu, max_mtu], which is [68, 1500] by default.

It causes the dev mtu of the erspan device to not be greater
than 1500, this limit value is not correct for ip6erspan tap
device.

Signed-off-by: Haishuang Yan 
---
 net/ipv6/ip6_gre.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index d5779d6..787d9f2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -2192,6 +2192,7 @@ static void ip6erspan_tap_setup(struct net_device *dev)
 {
ether_setup(dev);
 
+   dev->max_mtu = 0;
dev->netdev_ops = &ip6erspan_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
-- 
1.8.3.1

[PATCH v5 3/3] selftests: netfilter: add ipvs tunnel test case

2019-10-09 Thread Haishuang Yan

Test virtual server via ipip tunnel.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v2: optimize test script
---
 tools/testing/selftests/netfilter/ipvs.sh | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index 60250f7..edea729 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -168,6 +168,30 @@ test_nat() {
test_service
 }
 
+test_tun() {
+   ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+   ip netns exec ns1 modprobe ipip
+   ip netns exec ns1 ip link set tunl0 up
+   ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
+   ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
+   ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
+   ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+   ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r 
${rip_v4}:${port}
+   ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+   ip netns exec ns2 modprobe ipip
+   ip netns exec ns2 ip link set tunl0 up
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+   ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+   test_service
+}
+
 run_tests() {
local errors=
 
@@ -183,6 +207,12 @@ run_tests() {
test_nat
errors=$(( $errors + $? ))
 
+   echo "Testing Tunnel mode..."
+   cleanup
+   setup
+   test_tun
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
-- 
1.8.3.1

[PATCH v5 2/3] selftests: netfilter: add ipvs nat test case

2019-10-09 Thread Haishuang Yan

Test virtual server via NAT.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# ipvs.sh: PASS

Signed-off-by: Haishuang Yan 
---
 tools/testing/selftests/netfilter/ipvs.sh | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index f844c0a..60250f7 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -154,20 +154,40 @@ test_dr() {
test_service
 }
 
+test_nat() {
+   ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+   ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+   ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+   ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r 
${rip_v4}:${port}
+   ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+   ip netns exec ns2 ip link del veth20
+   ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
+
+   test_service
+}
+
 run_tests() {
local errors=
 
echo "Testing DR mode..."
+   cleanup
setup
test_dr
errors=$(( $errors + $? ))
 
+   echo "Testing NAT mode..."
+   cleanup
+   setup
+   test_nat
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
 trap cleanup EXIT
 
-cleanup
 run_tests
 
 if [ $? -ne 0 ]; then
-- 
1.8.3.1

[PATCH v5 1/3] selftests: netfilter: add ipvs test script

2019-10-09 Thread Haishuang Yan

Test virutal server via directing routing for IPv4.

Tested:

# selftests: netfilter: ipvs.sh
# Testing DR mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v5: use cmp to compare two file contents suggested by Simon Horman
v4: use #!/bin/bash -p suggested by Duncan Roe
v3: use bash style
v2: optimize test script
---
 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 178 +
 2 files changed, 179 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

diff --git a/tools/testing/selftests/netfilter/Makefile 
b/tools/testing/selftests/netfilter/Makefile
index 4144984..de1032b 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -2,6 +2,6 @@
 # Makefile for netfilter selftests
 
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
-   conntrack_icmp_related.sh nft_flowtable.sh
+   conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
new file mode 100755
index 000..f844c0a
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -0,0 +1,178 @@
+#!/bin/bash -p
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--+
+#  |   |
+# ns0  | ns1   |
+#  --- | ------|
+#  | veth01  | - | veth10  || veth12  ||
+#  ---peer   ------|
+#   |  ||  |
+#  --- ||  |
+#  |  br0| |-  peer |--|
+#  --- ||  |
+#   |  ||  |
+#  -- peer   --  ---   |
+#  |  veth02 | - |  veth20 | | veth21  |   |
+#  --  | --  ---   |
+#  | ns2   |
+#  |   |
+#--+
+#
+# We assume that all network driver are loaded
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+
+sysipvsnet="/proc/sys/net/ipv4/vs/"
+if [ ! -d $sysipvsnet ]; then
+   modprobe -q ip_vs
+   if [ $? -ne 0 ]; then
+   echo "skip: could not run test without ipvs module"
+   exit $ksft_skip
+   fi
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ip tool"
+   exit $ksft_skip
+fi
+
+ipvsadm -v > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ipvsadm"
+   exit $ksft_skip
+fi
+
+setup() {
+   ip netns add ns0
+   ip netns add ns1
+   ip netns add ns2
+
+   ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
+   ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
+   ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
+
+   ip netns exec ns0 ip link set veth01 up
+   ip netns exec ns0 ip link set veth02 up
+   ip netns exec ns0 ip link add br0 type bridge
+   ip netns exec ns0 ip link set veth01 master br0
+   ip netns exec ns0 ip link set veth02 master br0
+   ip netns exec ns0 ip link set br0 up
+   ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
+
+   ip netns exec ns1 ip link set lo up
+   ip netns exec ns1 ip link set veth10 up
+   ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
+   ip netns exec ns1 ip link set veth12 up
+   ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
+
+   ip netns exec ns2 ip link set lo up
+   ip netns exec ns2 ip link set veth21 up
+   ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
+   ip netns exec ns2 ip link set veth20 up
+   ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
+
+   sleep 1
+
+   dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+

[PATCH v5 0/3] selftests: netfilter: introduce test cases for ipvs

2019-10-09 Thread Haishuang Yan

This series patch include test cases for ipvs.

The test topology is who as below:
+--+
|  |   |
| ns0  | ns1   |
|  --- | ------|
|  | veth01  | - | veth10  || veth12  ||
|  ---peer   ------|
|   |  ||  |
|  --- ||  |
|  |  br0| |-  peer |--|
|  --- ||  |
|   |  ||  |
|  -- peer   --  ---   |
|  |  veth02 | - |  veth20 | | veth12  |   |
|  --  | --  ---   |
|  | ns2   |
|  |   |
+--+

Test results:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 

Haishuang Yan (3):
  selftests: netfilter: add ipvs test script
  selftests: netfilter: add ipvs nat test case
  selftests: netfilter: add ipvs tunnel test case

 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 228 +
 2 files changed, 229 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

-- 
1.8.3.1

Re: [PATCH v3 0/3] selftests: netfilter: introduce test cases for ipvs

2019-10-04 Thread Haishuang Yan



> On 2019年10月4日, at 下午7:47, Duncan Roe  wrote:
> 
> On Thu, Oct 03, 2019 at 10:41:06PM +0800, Haishuang Yan wrote:
>> 
>> 
>>> On 2019??10??2??, at 9:27, Duncan Roe  
>>> wrote:
>>> 
>>> On Tue, Oct 01, 2019 at 09:34:13PM +0300, Julian Anastasov wrote:
>>>> 
>>>>Hello,
>>>> 
>>>> On Tue, 1 Oct 2019, Haishuang Yan wrote:
>>>> 
>>>>> This series patch include test cases for ipvs.
>>>>> 
>>>>> The test topology is who as below:
>>>>> +--+
>>>>> |  |   |
>>>>> | ns0  | ns1   |
>>>>> |  --- | ------|
>>>>> |  | veth01  | - | veth10  || veth12  ||
>>>>> |  ---peer   ------|
>>>>> |   |  ||  |
>>>>> |  --- ||  |
>>>>> |  |  br0| |-  peer |--|
>>>>> |  --- ||  |
>>>>> |   |  ||  |
>>>>> |  -- peer   --  ---   |
>>>>> |  |  veth02 | - |  veth20 | | veth12  |   |
>>>>> |  --  | --  ---   |
>>>>> |  | ns2           |
>>>>> |  |   |
>>>>> +--+
>>>>> 
>>>>> Test results:
>>>>> # selftests: netfilter: ipvs.sh
>>>>> # Testing DR mode...
>>>>> # Testing NAT mode...
>>>>> # Testing Tunnel mode...
>>>>> # ipvs.sh: PASS
>>>>> ok 6 selftests: netfilter: ipvs.sh
>>>>> 
>>>>> Haishuang Yan (3):
>>>>> selftests: netfilter: add ipvs test script
>>>>> selftests: netfilter: add ipvs nat test case
>>>>> selftests: netfilter: add ipvs tunnel test case
>>>> 
>>>> Acked-by: Julian Anastasov 
>>>> 
>>>>> tools/testing/selftests/netfilter/Makefile |   2 +-
>>>>> tools/testing/selftests/netfilter/ipvs.sh  | 234 
>>>>> +
>>>>> 2 files changed, 235 insertions(+), 1 deletion(-)
>>>>> create mode 100755 tools/testing/selftests/netfilter/ipvs.sh
>>>> 
>>>> Regards
>>>> 
>>>> --
>>>> Julian Anastasov 
>>> 
>>> I still prefer #!/bin/sh in 1/3. You never know what's in someone's 
>>> environment
>>> 
>>> Cheers ... Duncan.
>>> 
>> 
>> It??s also my preference too. "_"
>> 
>> I have tested both #!/bin/bash and #!/bin/sh script, they all works properly.
> 
> Enter these 2 lines:
>> ip(){ return 0; }
>> export -f ip
> 
> Now try the #!/bin/bash script. If that now fails, try again with #!/bin/bash
> changed to #!/bin/bash -p
> 
> Any better now?
> 
> Cheers ... Duncan.
> 
It’s better now, thanks for your explanation.
In v3 commit I will use #!/bin/bash -p to prevent exporting function from 
environment variables.

[PATCH v4 0/3] selftests: netfilter: introduce test cases for ipvs

2019-10-05 Thread Haishuang Yan

This series patch include test cases for ipvs.

The test topology is who as below:
+--+
|  |   |
| ns0  | ns1   |
|  --- | ------|
|  | veth01  | - | veth10  || veth12  ||
|  ---peer   ------|
|   |  ||  |
|  --- ||  |
|  |  br0| |-  peer |--|
|  --- ||  |
|   |  ||  |
|  -- peer   --  ---   |
|  |  veth02 | - |  veth20 | | veth12  |   |
|  --  | --  ---   |
|  | ns2   |
|  |   |
+--+

Test results:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Haishuang Yan (3):
  selftests: netfilter: add ipvs test script
  selftests: netfilter: add ipvs nat test case
  selftests: netfilter: add ipvs tunnel test case

 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 234 +
 2 files changed, 235 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

-- 
1.8.3.1

[PATCH v4 1/3] selftests: netfilter: add ipvs test script

2019-10-05 Thread Haishuang Yan

Test virutal server via directing routing for IPv4.

Tested:

# selftests: netfilter: ipvs.sh
# Testing DR mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v4: use #!/bin/bash -p suggested by Duncan Roe
v3: use bash style
v2: optimize test script
---
 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 184 +
 2 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

diff --git a/tools/testing/selftests/netfilter/Makefile 
b/tools/testing/selftests/netfilter/Makefile
index 4144984..de1032b 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -2,6 +2,6 @@
 # Makefile for netfilter selftests
 
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
-   conntrack_icmp_related.sh nft_flowtable.sh
+   conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
new file mode 100755
index 000..f6da1bd
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -0,0 +1,184 @@
+#!/bin/bash -p
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--+
+#  |   |
+# ns0  | ns1   |
+#  --- | ------|
+#  | veth01  | - | veth10  || veth12  ||
+#  ---peer   ------|
+#   |  ||  |
+#  --- ||  |
+#  |  br0| |-  peer |--|
+#  --- ||  |
+#   |  ||  |
+#  -- peer   --  ---   |
+#  |  veth02 | - |  veth20 | | veth21  |   |
+#  --  | --  ---   |
+#  | ns2   |
+#  |   |
+#--+
+#
+# We assume that all network driver are loaded
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+
+sysipvsnet=/proc/sys/net/ipv4/vs/
+if [ ! -d /proc/sys/net/ipv4/vs/ ]; then
+modprobe -q ip_vs
+if [ $? -ne 0 ]; then
+echo "SKIP: Could not run test without ipvs module"
+   exit $ksft_skip
+fi
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ip tool"
+   exit $ksft_skip
+fi
+
+ipvsadm -v > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ipvsadm"
+   exit $ksft_skip
+fi
+
+nc --version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ncat"
+   exit $ksft_skip
+fi
+
+setup() {
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
+ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
+ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
+
+ip netns exec ns0 ip link set veth01 up
+ip netns exec ns0 ip link set veth02 up
+ip netns exec ns0 ip link add br0 type bridge
+ip netns exec ns0 ip link set veth01 master br0
+ip netns exec ns0 ip link set veth02 master br0
+ip netns exec ns0 ip link set br0 up
+ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
+
+ip netns exec ns1 ip link set lo up
+ip netns exec ns1 ip link set veth10 up
+ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
+ip netns exec ns1 ip link set veth12 up
+ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
+
+ip netns exec ns2 ip link set lo up
+ip netns exec ns2 ip link set veth21 up
+ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
+ip netns exec ns2 ip link set veth20 up
+ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
+}
+
+cleanup() {
+for i in 0 1 2
+do
+   ip netns del ns$i > /dev/null 2>&1
+done
+pkill nc
+}
+
+server_listen() {
+   ip netns exec ns2 nc -l -p

[PATCH v4 3/3] selftests: netfilter: add ipvs tunnel test case

2019-10-05 Thread Haishuang Yan

Test virtual server via ipip tunnel.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v2: optimize test script
---
 tools/testing/selftests/netfilter/ipvs.sh | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index 2601a7c..48647ae 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -174,6 +174,30 @@ test_nat() {
 test_service
 }
 
+test_tun() {
+ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ip netns exec ns1 modprobe ipip
+ip netns exec ns1 ip link set tunl0 up
+ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
+ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
+ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
+ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ip netns exec ns2 modprobe ipip
+ip netns exec ns2 ip link set tunl0 up
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
+ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+test_service
+}
+
 run_tests() {
local errors=
 
@@ -189,6 +213,12 @@ run_tests() {
test_nat
errors=$(( $errors + $? ))
 
+   echo "Testing Tunnel mode..."
+   cleanup
+   setup
+   test_tun
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
-- 
1.8.3.1

[PATCH v4 2/3] selftests: netfilter: add ipvs nat test case

2019-10-05 Thread Haishuang Yan

Test virtual server via NAT.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# ipvs.sh: PASS

Signed-off-by: Haishuang Yan 
---
 tools/testing/selftests/netfilter/ipvs.sh | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index f6da1bd..2601a7c 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -160,20 +160,40 @@ test_dr() {
 test_service
 }
 
+test_nat() {
+ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ip netns exec ns2 ip link del veth20
+ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
+
+test_service
+}
+
 run_tests() {
local errors=
 
echo "Testing DR mode..."
+   cleanup
setup
test_dr
errors=$(( $errors + $? ))
 
+   echo "Testing NAT mode..."
+   cleanup
+   setup
+   test_nat
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
 trap cleanup EXIT
 
-cleanup
 run_tests
 
 if [ $? -ne 0 ]; then
-- 
1.8.3.1

[PATCH 0/3] selftests: netfilter: introduce test cases for ipvs

2019-09-26 Thread Haishuang Yan

This series patch include test cases for ipvs.

The test topology is who as below:
+--+
|  |   |
| ns0  | ns1   |
|  --- | ------|
|  | veth01  | - | veth10  || veth12  ||
|  ---peer   ------|
|   |  ||  |
|  --- ||  |
|  |  br0| |-  peer |--|
|  --- ||  |
|   |  ||  |
|  -- peer   --  ---   |
|  |  veth02 | - |  veth20 | | veth12  |   |
|  --  | --  ---   |
|  | ns2   |
|  |   |
+--+

Test results:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Haishuang Yan (3):
  selftests: netfilter: add ipvs test script
  selftests: netfilter: add ipvs nat test case
  selftests: netfilter: add ipvs tunnel test case

 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 230 +
 2 files changed, 231 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

-- 
1.8.3.1

[PATCH 1/3] selftests: netfilter: add ipvs test script

2019-09-26 Thread Haishuang Yan

Test virutal server via directing routing for IPv4.

Tested:

# selftests: netfilter: ipvs.sh
# Testing DR mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 177 +
 2 files changed, 178 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

diff --git a/tools/testing/selftests/netfilter/Makefile 
b/tools/testing/selftests/netfilter/Makefile
index 4144984..de1032b 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -2,6 +2,6 @@
 # Makefile for netfilter selftests
 
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
-   conntrack_icmp_related.sh nft_flowtable.sh
+   conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
new file mode 100755
index 000..15c386b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -0,0 +1,177 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#---
+#  |   |
+# ns0  | ns1   |
+#  --- | ------|
+#  | veth01  | - | veth10  || veth12  ||
+#  ---peer   ------|
+#   |  ||  |
+#  --- ||  |
+#  |  br0| |-  peer |--|
+#  --- ||  |
+#   |  ||  |
+#  -- peer   --  ---   |
+#  |  veth02 | - |  veth20 | | veth12  |   |
+#  --  | --  ---   |
+#  | ns2   |
+#  |   |
+#---
+#
+# We assume that all network driver are loaded
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ip tool"
+   exit $ksft_skip
+fi
+
+ipvsadm -v > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ipvsadm"
+   exit $ksft_skip
+fi
+
+nc --version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ncat"
+   exit $ksft_skip
+fi
+
+setup() {
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
+ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
+ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
+
+ip netns exec ns0 ip link set veth01 up
+ip netns exec ns0 ip link set veth02 up
+ip netns exec ns0 ip link add br0 type bridge
+ip netns exec ns0 ip link set veth01 master br0
+ip netns exec ns0 ip link set veth02 master br0
+ip netns exec ns0 ip link set br0 up
+ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
+
+ip netns exec ns1 ip link set lo up
+ip netns exec ns1 ip link set veth10 up
+ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
+ip netns exec ns1 ip link set veth12 up
+ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
+
+ip netns exec ns2 ip link set lo up
+ip netns exec ns2 ip link set veth21 up
+ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
+ip netns exec ns2 ip link set veth20 up
+ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
+}
+
+cleanup() {
+for i in 0 1 2
+do
+   ip netns del ns$i > /dev/null 2>&1
+done
+pkill nc
+}
+
+server_listen() {
+   ip netns exec ns2 nc -l -p 8080 > "${outfile}" &
+   server_pid=$!
+   sleep 0.2
+}
+
+client_connect() {
+   ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}"
+}
+
+verify_data() {
+   wait "${server_pid}"
+   # sha1sum returns two fields [sha1] [filepath]
+   # convert to bas

[PATCH 3/3] selftests: netfilter: add ipvs tunnel test case

2019-09-26 Thread Haishuang Yan

Test virtual server via ipip tunnel.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
 tools/testing/selftests/netfilter/ipvs.sh | 33 +++
 1 file changed, 33 insertions(+)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index 40058f9..2012cec 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -167,6 +167,33 @@ test_nat() {
 test_service
 }
 
+test_tun() {
+ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ip netns exec ns1 modprobe ipip
+ip netns exec ns1 ip link set tunl0 up
+ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
+ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
+ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
+ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ip netns exec ns2 modprobe ipip
+ip netns exec ns2 ip link set tunl0 up
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
+ip netns exec ns2 sysctl -qw net.ipv4.conf.lo.arp_ignore=1
+ip netns exec ns2 sysctl -qw net.ipv4.conf.lo.arp_announce=2
+ip netns exec ns2 sysctl -qw net.ipv4.conf.lo.rp_filter=0
+ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+test_service
+}
+
 run_tests() {
local errors=
 
@@ -182,6 +209,12 @@ run_tests() {
test_nat
errors=$(( $errors + $? ))
 
+   echo "Testing Tunnel mode..."
+   cleanup
+   setup
+   test_tun
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
-- 
1.8.3.1

[PATCH 2/3] selftests: netfilter: add ipvs nat test case

2019-09-26 Thread Haishuang Yan

Test virtual server via NAT.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# ipvs.sh: PASS

Signed-off-by: Haishuang Yan 
---
 tools/testing/selftests/netfilter/ipvs.sh | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index 15c386b..40058f9 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -153,20 +153,40 @@ test_dr() {
 test_service
 }
 
+test_nat() {
+ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ip netns exec ns2 ip link del veth20
+ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
+
+test_service
+}
+
 run_tests() {
local errors=
 
echo "Testing DR mode..."
+   cleanup
setup
test_dr
errors=$(( $errors + $? ))
 
+   echo "Testing NAT mode..."
+   cleanup
+   setup
+   test_nat
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
 trap cleanup EXIT
 
-cleanup
 run_tests
 
 if [ $? -ne 0 ]; then
-- 
1.8.3.1

[PATCH v2 0/2] ipvs: speedup ipvs netns dismantle

2019-09-26 Thread Haishuang Yan

Implement exit_batch() method to dismantle more ipvs netns
per round.

Tested:
$  cat add_del_unshare.sh
#!/bin/bash

for i in `seq 1 100`
do
 (for j in `seq 1 40` ; do  unshare -n ipvsadm -A -t 172.16.$i.$j:80 
>/dev/null ; done) &
done
wait; grep net_namespace /proc/slabinfo

Befor patch:
$  time sh add_del_unshare.sh
net_namespace   4020   4020   473668 : tunables000 : 
slabdata670670  0

real0m8.086s
user0m2.025s
sys 0m36.956s

After patch:
$  time sh add_del_unshare.sh
net_namespace   4020   4020   473668 : tunables000 : 
slabdata670670  0

real0m7.623s
user0m2.003s
sys 0m32.935s

Haishuang Yan (2):
  ipvs: batch __ip_vs_cleanup
  ipvs: batch __ip_vs_dev_cleanup

 include/net/ip_vs.h |  2 +-
 net/netfilter/ipvs/ip_vs_core.c | 47 -
 net/netfilter/ipvs/ip_vs_ctl.c  | 12 ---
 3 files changed, 38 insertions(+), 23 deletions(-)

-- 
1.8.3.1

[PATCH v2 1/2] ipvs: batch __ip_vs_cleanup

2019-09-26 Thread Haishuang Yan

It's better to batch __ip_vs_cleanup to speedup ipvs
connections dismantle.

Signed-off-by: Haishuang Yan 
---
v2: remove unused pointer list
---
 include/net/ip_vs.h |  2 +-
 net/netfilter/ipvs/ip_vs_core.c | 28 
 net/netfilter/ipvs/ip_vs_ctl.c  | 12 +---
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3759167..93e7a25 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1324,7 +1324,7 @@ static inline void ip_vs_control_del(struct ip_vs_conn 
*cp)
 void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs);
 void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs);
 void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs);
-void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs);
+void ip_vs_service_nets_cleanup(struct list_head *net_list);
 
 /* IPVS application functions
  * (from ip_vs_app.c)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 8b80ab7..93cfb47 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2402,18 +2402,22 @@ static int __net_init __ip_vs_init(struct net *net)
return -ENOMEM;
 }
 
-static void __net_exit __ip_vs_cleanup(struct net *net)
+static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
 {
-   struct netns_ipvs *ipvs = net_ipvs(net);
-
-   ip_vs_service_net_cleanup(ipvs);/* ip_vs_flush() with locks */
-   ip_vs_conn_net_cleanup(ipvs);
-   ip_vs_app_net_cleanup(ipvs);
-   ip_vs_protocol_net_cleanup(ipvs);
-   ip_vs_control_net_cleanup(ipvs);
-   ip_vs_estimator_net_cleanup(ipvs);
-   IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
-   net->ipvs = NULL;
+   struct netns_ipvs *ipvs;
+   struct net *net;
+
+   ip_vs_service_nets_cleanup(net_list);   /* ip_vs_flush() with locks */
+   list_for_each_entry(net, net_list, exit_list) {
+   ipvs = net_ipvs(net);
+   ip_vs_conn_net_cleanup(ipvs);
+   ip_vs_app_net_cleanup(ipvs);
+   ip_vs_protocol_net_cleanup(ipvs);
+   ip_vs_control_net_cleanup(ipvs);
+   ip_vs_estimator_net_cleanup(ipvs);
+   IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
+   net->ipvs = NULL;
+   }
 }
 
 static int __net_init __ip_vs_dev_init(struct net *net)
@@ -2442,7 +2446,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net 
*net)
 
 static struct pernet_operations ipvs_core_ops = {
.init = __ip_vs_init,
-   .exit = __ip_vs_cleanup,
+   .exit_batch = __ip_vs_cleanup_batch,
.id   = &ip_vs_net_id,
.size = sizeof(struct netns_ipvs),
 };
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 8b48e7c..153c77b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1607,14 +1607,20 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool 
cleanup)
 
 /*
  * Delete service by {netns} in the service table.
- * Called by __ip_vs_cleanup()
+ * Called by __ip_vs_batch_cleanup()
  */
-void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
+void ip_vs_service_nets_cleanup(struct list_head *net_list)
 {
+   struct netns_ipvs *ipvs;
+   struct net *net;
+
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
-   ip_vs_flush(ipvs, true);
+   list_for_each_entry(net, net_list, exit_list) {
+   ipvs = net_ipvs(net);
+   ip_vs_flush(ipvs, true);
+   }
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
 }
-- 
1.8.3.1

[PATCH v2 2/2] ipvs: batch __ip_vs_dev_cleanup

2019-09-26 Thread Haishuang Yan

It's better to batch __ip_vs_cleanup to speedup ipvs
devices dismantle.

Signed-off-by: Haishuang Yan 
---
v2: remove unused pointer list
---
 net/netfilter/ipvs/ip_vs_core.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 93cfb47..512259f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2433,14 +2433,19 @@ static int __net_init __ip_vs_dev_init(struct net *net)
return ret;
 }
 
-static void __net_exit __ip_vs_dev_cleanup(struct net *net)
+static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
 {
-   struct netns_ipvs *ipvs = net_ipvs(net);
+   struct netns_ipvs *ipvs;
+   struct net *net;
+
EnterFunction(2);
-   nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
-   ipvs->enable = 0;   /* Disable packet reception */
-   smp_wmb();
-   ip_vs_sync_net_cleanup(ipvs);
+   list_for_each_entry(net, net_list, exit_list) {
+   ipvs = net_ipvs(net);
+   nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+   ipvs->enable = 0;   /* Disable packet reception */
+   smp_wmb();
+   ip_vs_sync_net_cleanup(ipvs);
+   }
LeaveFunction(2);
 }
 
@@ -2453,7 +2458,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net 
*net)
 
 static struct pernet_operations ipvs_core_dev_ops = {
.init = __ip_vs_dev_init,
-   .exit = __ip_vs_dev_cleanup,
+   .exit_batch = __ip_vs_dev_cleanup_batch,
 };
 
 /*
-- 
1.8.3.1

[PATCH v2 2/3] selftests: netfilter: add ipvs nat test case

2019-09-26 Thread Haishuang Yan

Test virtual server via NAT.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# ipvs.sh: PASS

Signed-off-by: Haishuang Yan 
---
 tools/testing/selftests/netfilter/ipvs.sh | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index 658c06b..e95453b 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -160,20 +160,40 @@ test_dr() {
 test_service
 }
 
+test_nat() {
+ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ip netns exec ns2 ip link del veth20
+ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
+
+test_service
+}
+
 run_tests() {
local errors=
 
echo "Testing DR mode..."
+   cleanup
setup
test_dr
errors=$(( $errors + $? ))
 
+   echo "Testing NAT mode..."
+   cleanup
+   setup
+   test_nat
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
 trap cleanup EXIT
 
-cleanup
 run_tests
 
 if [ $? -ne 0 ]; then
-- 
1.8.3.1

[PATCH v2 0/3] selftests: netfilter: introduce test cases for ipvs

2019-09-26 Thread Haishuang Yan

This series patch include test cases for ipvs.

The test topology is who as below:
+--+
|  |   |
| ns0  | ns1   |
|  --- | ------|
|  | veth01  | - | veth10  || veth12  ||
|  ---peer   ------|
|   |  ||  |
|  --- ||  |
|  |  br0| |-  peer |--|
|  --- ||  |
|   |  ||  |
|  -- peer   --  ---   |
|  |  veth02 | - |  veth20 | | veth21  |   |
|  --  | --  ---   |
|  | ns2   |
|  |   |
+--+

Test results:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Haishuang Yan (3):
  selftests: netfilter: add ipvs test script
  selftests: netfilter: add ipvs nat test case
  selftests: netfilter: add ipvs tunnel test case

 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 234 +
 2 files changed, 235 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

-- 
1.8.3.1

[PATCH v2 3/3] selftests: netfilter: add ipvs tunnel test case

2019-09-26 Thread Haishuang Yan

Test virtual server via ipip tunnel.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v2: optimize test script
---
 tools/testing/selftests/netfilter/ipvs.sh | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index e95453b..b09994e 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -174,6 +174,30 @@ test_nat() {
 test_service
 }
 
+test_tun() {
+ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ip netns exec ns1 modprobe ipip
+ip netns exec ns1 ip link set tunl0 up
+ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
+ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
+ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
+ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ip netns exec ns2 modprobe ipip
+ip netns exec ns2 ip link set tunl0 up
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
+ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+test_service
+}
+
 run_tests() {
local errors=
 
@@ -189,6 +213,12 @@ run_tests() {
test_nat
errors=$(( $errors + $? ))
 
+   echo "Testing Tunnel mode..."
+   cleanup
+   setup
+   test_tun
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
-- 
1.8.3.1

[PATCH v2 1/3] selftests: netfilter: add ipvs test script

2019-09-26 Thread Haishuang Yan

Test virutal server via directing routing for IPv4.

Tested:

# selftests: netfilter: ipvs.sh
# Testing DR mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v2: optimize test script
---
 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 184 +
 2 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

diff --git a/tools/testing/selftests/netfilter/Makefile 
b/tools/testing/selftests/netfilter/Makefile
index 4144984..de1032b 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -2,6 +2,6 @@
 # Makefile for netfilter selftests
 
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
-   conntrack_icmp_related.sh nft_flowtable.sh
+   conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
new file mode 100755
index 000..658c06b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -0,0 +1,184 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--+
+#  |   |
+# ns0  | ns1   |
+#  --- | ------|
+#  | veth01  | - | veth10  || veth12  ||
+#  ---peer   ------|
+#   |  ||  |
+#  --- ||  |
+#  |  br0| |-  peer |--|
+#  --- ||  |
+#   |  ||  |
+#  -- peer   --  ---   |
+#  |  veth02 | - |  veth20 | | veth21  |   |
+#  --  | --  ---   |
+#  | ns2   |
+#  |   |
+#--+
+#
+# We assume that all network driver are loaded
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+
+sysipvsnet=/proc/sys/net/ipv4/vs/
+if [ ! -d /proc/sys/net/ipv4/vs/ ]; then
+modprobe -q ip_vs
+if [ $? -ne 0 ]; then
+echo "SKIP: Could not run test without ipvs module"
+   exit $ksft_skip
+fi
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ip tool"
+   exit $ksft_skip
+fi
+
+ipvsadm -v > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ipvsadm"
+   exit $ksft_skip
+fi
+
+nc --version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ncat"
+   exit $ksft_skip
+fi
+
+setup() {
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
+ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
+ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
+
+ip netns exec ns0 ip link set veth01 up
+ip netns exec ns0 ip link set veth02 up
+ip netns exec ns0 ip link add br0 type bridge
+ip netns exec ns0 ip link set veth01 master br0
+ip netns exec ns0 ip link set veth02 master br0
+ip netns exec ns0 ip link set br0 up
+ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
+
+ip netns exec ns1 ip link set lo up
+ip netns exec ns1 ip link set veth10 up
+ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
+ip netns exec ns1 ip link set veth12 up
+ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
+
+ip netns exec ns2 ip link set lo up
+ip netns exec ns2 ip link set veth21 up
+ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
+ip netns exec ns2 ip link set veth20 up
+ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
+}
+
+cleanup() {
+for i in 0 1 2
+do
+   ip netns del ns$i > /dev/null 2>&1
+done
+pkill nc
+}
+
+server_listen() {
+   ip netns exec ns2 nc -l -p 8080 > "${outfile}" &
+   server_pid=$!
+

[PATCH] erspan: remove the incorrect mtu limit for erspan

2019-09-26 Thread Haishuang Yan

erspan driver calls ether_setup(), after commit 61e84623ace3
("net: centralize net_device min/max MTU checking"), the range
of mtu is [min_mtu, max_mtu], which is [68, 1500] by default.

It causes the dev mtu of the erspan device to not be greater
than 1500, this limit value is not correct for ipgre tap device.

Tested:
Before patch:
# ip link set erspan0 mtu 1600
Error: mtu greater than device maximum.
After patch:
# ip link set erspan0 mtu 1600
# ip -d link show erspan0
21: erspan0@NONE:  mtu 1600 qdisc noop state DOWN
mode DEFAULT group default qlen 1000
link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 
maxmtu 0

Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking")
Signed-off-by: Haishuang Yan 
---
 net/ipv4/ip_gre.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a53a543..52690bb 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1446,6 +1446,7 @@ static void erspan_setup(struct net_device *dev)
struct ip_tunnel *t = netdev_priv(dev);
 
ether_setup(dev);
+   dev->max_mtu = 0;
dev->netdev_ops = &erspan_netdev_ops;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
-- 
1.8.3.1

Re: [net-next 1/2] ipvs: batch __ip_vs_cleanup

2019-07-18 Thread Haishuang Yan



> On 2019年7月16日, at 上午4:39, Julian Anastasov  wrote:
> 
> 
>   Hello,
> 
> On Sat, 13 Jul 2019, Haishuang Yan wrote:
> 
>> It's better to batch __ip_vs_cleanup to speedup ipvs
>> connections dismantle.
>> 
>> Signed-off-by: Haishuang Yan 
>> ---
>> include/net/ip_vs.h |  2 +-
>> net/netfilter/ipvs/ip_vs_core.c | 29 +
>> net/netfilter/ipvs/ip_vs_ctl.c  | 13 ++---
>> 3 files changed, 28 insertions(+), 16 deletions(-)
>> 
>> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
>> index 3759167..93e7a25 100644
>> --- a/include/net/ip_vs.h
>> +++ b/include/net/ip_vs.h
>> @@ -1324,7 +1324,7 @@ static inline void ip_vs_control_del(struct ip_vs_conn 
>> *cp)
>> void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs);
>> void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs);
>> void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs);
>> -void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs);
>> +void ip_vs_service_nets_cleanup(struct list_head *net_list);
>> 
>> /* IPVS application functions
>>  * (from ip_vs_app.c)
>> diff --git a/net/netfilter/ipvs/ip_vs_core.c 
>> b/net/netfilter/ipvs/ip_vs_core.c
>> index 46f06f9..b4d79b7 100644
>> --- a/net/netfilter/ipvs/ip_vs_core.c
>> +++ b/net/netfilter/ipvs/ip_vs_core.c
>> @@ -2402,18 +2402,23 @@ static int __net_init __ip_vs_init(struct net *net)
>>  return -ENOMEM;
>> }
>> 
>> -static void __net_exit __ip_vs_cleanup(struct net *net)
>> +static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
>> {
>> -struct netns_ipvs *ipvs = net_ipvs(net);
>> -
>> -ip_vs_service_net_cleanup(ipvs);/* ip_vs_flush() with locks */
>> -ip_vs_conn_net_cleanup(ipvs);
>> -ip_vs_app_net_cleanup(ipvs);
>> -ip_vs_protocol_net_cleanup(ipvs);
>> -ip_vs_control_net_cleanup(ipvs);
>> -ip_vs_estimator_net_cleanup(ipvs);
>> -IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
>> -net->ipvs = NULL;
>> +struct netns_ipvs *ipvs;
>> +struct net *net;
>> +LIST_HEAD(list);
>> +
>> +ip_vs_service_nets_cleanup(net_list);   /* ip_vs_flush() with locks */
>> +list_for_each_entry(net, net_list, exit_list) {
> 
>   How much faster is to replace list_for_each_entry in
> ops_exit_list() with this one. IPVS can waste time in calls
> such as kthread_stop() and del_timer_sync() but I'm not sure
> we can solve it easily. What gain do you see in benchmarks?

Hi, 

As the following benchmark testing results show, there is a little performance 
improvement:

$  cat add_del_unshare.sh
#!/bin/bash

for i in `seq 1 100`
do
 (for j in `seq 1 40` ; do  unshare -n ipvsadm -A -t 172.16.$i.$j:80 
>/dev/null ; done) &
done
wait; grep net_namespace /proc/slabinfo

Befor patch:
$  time sh add_del_unshare.sh
net_namespace   4020   4020   473668 : tunables000 : 
slabdata670670  0

real0m8.086s
user0m2.025s
sys 0m36.956s

After patch:
$  time sh add_del_unshare.sh
net_namespace   4020   4020   473668 : tunables000 : 
slabdata670670  0

real0m7.623s
user0m2.003s
sys 0m32.935s


> 
>> +ipvs = net_ipvs(net);
>> +ip_vs_conn_net_cleanup(ipvs);
>> +ip_vs_app_net_cleanup(ipvs);
>> +ip_vs_protocol_net_cleanup(ipvs);
>> +ip_vs_control_net_cleanup(ipvs);
>> +ip_vs_estimator_net_cleanup(ipvs);
>> +IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
>> +net->ipvs = NULL;
>> +}
>> }
> 
> Regards
> 
> --
> Julian Anastasov 
>

[PATCH] openvswitch: Fix a possible memory leak on dst_cache

2019-07-18 Thread Haishuang Yan

dst_cache should be destroyed when fail to add flow actions.

Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel")
Signed-off-by: Haishuang Yan 
---
 net/openvswitch/flow_netlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d7559c6..1fd1cdd 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2608,6 +2608,7 @@ static int validate_and_copy_set_tun(const struct nlattr 
*attr,
 sizeof(*ovs_tun), log);
if (IS_ERR(a)) {
dst_release((struct dst_entry *)tun_dst);
+   dst_cache_destroy(&tun_dst->u.tun_info.dst_cache);
return PTR_ERR(a);
}
 
-- 
1.8.3.1

[PATCH v6 1/3] selftests: netfilter: add ipvs test script

2019-10-10 Thread Haishuang Yan

Test virutal server via directing routing for IPv4.

Tested:

# selftests: netfilter: ipvs.sh
# Testing DR mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v6: use #!/bin/sh
v5: use cmp to compare two file contents suggested by Simon Horman
v4: use #!/bin/bash -p suggested by Duncan Roe
v3: use bash style
v2: optimize test script
---
 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 178 +
 2 files changed, 179 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

diff --git a/tools/testing/selftests/netfilter/Makefile 
b/tools/testing/selftests/netfilter/Makefile
index 4144984..de1032b 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -2,6 +2,6 @@
 # Makefile for netfilter selftests
 
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
-   conntrack_icmp_related.sh nft_flowtable.sh
+   conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
new file mode 100755
index 000..3d11d87
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -0,0 +1,178 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--+
+#  |   |
+# ns0  | ns1   |
+#  --- | ------|
+#  | veth01  | - | veth10  || veth12  ||
+#  ---peer   ------|
+#   |  ||  |
+#  --- ||  |
+#  |  br0| |-  peer |--|
+#  --- ||  |
+#   |  ||  |
+#  -- peer   --  ---   |
+#  |  veth02 | - |  veth20 | | veth21  |   |
+#  --  | --  ---   |
+#  | ns2   |
+#  |   |
+#--+
+#
+# We assume that all network driver are loaded
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+
+sysipvsnet="/proc/sys/net/ipv4/vs/"
+if [ ! -d $sysipvsnet ]; then
+   modprobe -q ip_vs
+   if [ $? -ne 0 ]; then
+   echo "skip: could not run test without ipvs module"
+   exit $ksft_skip
+   fi
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ip tool"
+   exit $ksft_skip
+fi
+
+ipvsadm -v > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+   echo "SKIP: Could not run test without ipvsadm"
+   exit $ksft_skip
+fi
+
+setup() {
+   ip netns add ns0
+   ip netns add ns1
+   ip netns add ns2
+
+   ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
+   ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
+   ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
+
+   ip netns exec ns0 ip link set veth01 up
+   ip netns exec ns0 ip link set veth02 up
+   ip netns exec ns0 ip link add br0 type bridge
+   ip netns exec ns0 ip link set veth01 master br0
+   ip netns exec ns0 ip link set veth02 master br0
+   ip netns exec ns0 ip link set br0 up
+   ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
+
+   ip netns exec ns1 ip link set lo up
+   ip netns exec ns1 ip link set veth10 up
+   ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
+   ip netns exec ns1 ip link set veth12 up
+   ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
+
+   ip netns exec ns2 ip link set lo up
+   ip netns exec ns2 ip link set veth21 up
+   ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
+   ip netns exec ns2 ip link set veth20 up
+   ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
+
+   sleep 1
+
+   dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+

[PATCH v6 2/3] selftests: netfilter: add ipvs nat test case

2019-10-10 Thread Haishuang Yan

Test virtual server via NAT.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# ipvs.sh: PASS

Signed-off-by: Haishuang Yan 
---
 tools/testing/selftests/netfilter/ipvs.sh | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index 3d11d87..8b2e618 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -154,20 +154,40 @@ test_dr() {
test_service
 }
 
+test_nat() {
+   ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+   ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+   ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+   ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r 
${rip_v4}:${port}
+   ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+   ip netns exec ns2 ip link del veth20
+   ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
+
+   test_service
+}
+
 run_tests() {
local errors=
 
echo "Testing DR mode..."
+   cleanup
setup
test_dr
errors=$(( $errors + $? ))
 
+   echo "Testing NAT mode..."
+   cleanup
+   setup
+   test_nat
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
 trap cleanup EXIT
 
-cleanup
 run_tests
 
 if [ $? -ne 0 ]; then
-- 
1.8.3.1

[PATCH v6 3/3] selftests: netfilter: add ipvs tunnel test case

2019-10-10 Thread Haishuang Yan

Test virtual server via ipip tunnel.

Tested:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 
---
v2: optimize test script
---
 tools/testing/selftests/netfilter/ipvs.sh | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/tools/testing/selftests/netfilter/ipvs.sh 
b/tools/testing/selftests/netfilter/ipvs.sh
index 8b2e618..c3b8f90 100755
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -168,6 +168,30 @@ test_nat() {
test_service
 }
 
+test_tun() {
+   ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+   ip netns exec ns1 modprobe ipip
+   ip netns exec ns1 ip link set tunl0 up
+   ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
+   ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
+   ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
+   ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+   ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r 
${rip_v4}:${port}
+   ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+   ip netns exec ns2 modprobe ipip
+   ip netns exec ns2 ip link set tunl0 up
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+   ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+   ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+   test_service
+}
+
 run_tests() {
local errors=
 
@@ -183,6 +207,12 @@ run_tests() {
test_nat
errors=$(( $errors + $? ))
 
+   echo "Testing Tunnel mode..."
+   cleanup
+   setup
+   test_tun
+   errors=$(( $errors + $? ))
+
return $errors
 }
 
-- 
1.8.3.1

[PATCH v6 0/3] selftests: netfilter: introduce test cases for ipvs

2019-10-10 Thread Haishuang Yan

This series patch include test cases for ipvs.

The test topology is who as below:
+--+
|  |   |
| ns0  | ns1   |
|  --- | ------|
|  | veth01  | - | veth10  || veth12  ||
|  ---peer   ------|
|   |  ||  |
|  --- ||  |
|  |  br0| |-  peer |--|
|  --- ||  |
|   |  ||  |
|  -- peer   --  ---   |
|  |  veth02 | - |  veth20 | | veth12  |   |
|  --  | --  ---   |
|  | ns2   |
|  |   |
+--+

Test results:
# selftests: netfilter: ipvs.sh
# Testing DR mode...
# Testing NAT mode...
# Testing Tunnel mode...
# ipvs.sh: PASS
ok 6 selftests: netfilter: ipvs.sh

Signed-off-by: Haishuang Yan 

Haishuang Yan (3):
  selftests: netfilter: add ipvs test script
  selftests: netfilter: add ipvs nat test case
  selftests: netfilter: add ipvs tunnel test case

 tools/testing/selftests/netfilter/Makefile |   2 +-
 tools/testing/selftests/netfilter/ipvs.sh  | 228 +
 2 files changed, 229 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipvs.sh

--
1.8.3.1

[net-next 1/2] ipvs: batch __ip_vs_cleanup

2019-07-13 Thread Haishuang Yan

It's better to batch __ip_vs_cleanup to speedup ipvs
connections dismantle.

Signed-off-by: Haishuang Yan 
---
 include/net/ip_vs.h |  2 +-
 net/netfilter/ipvs/ip_vs_core.c | 29 +
 net/netfilter/ipvs/ip_vs_ctl.c  | 13 ++---
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3759167..93e7a25 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1324,7 +1324,7 @@ static inline void ip_vs_control_del(struct ip_vs_conn 
*cp)
 void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs);
 void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs);
 void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs);
-void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs);
+void ip_vs_service_nets_cleanup(struct list_head *net_list);
 
 /* IPVS application functions
  * (from ip_vs_app.c)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 46f06f9..b4d79b7 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2402,18 +2402,23 @@ static int __net_init __ip_vs_init(struct net *net)
return -ENOMEM;
 }
 
-static void __net_exit __ip_vs_cleanup(struct net *net)
+static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
 {
-   struct netns_ipvs *ipvs = net_ipvs(net);
-
-   ip_vs_service_net_cleanup(ipvs);/* ip_vs_flush() with locks */
-   ip_vs_conn_net_cleanup(ipvs);
-   ip_vs_app_net_cleanup(ipvs);
-   ip_vs_protocol_net_cleanup(ipvs);
-   ip_vs_control_net_cleanup(ipvs);
-   ip_vs_estimator_net_cleanup(ipvs);
-   IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
-   net->ipvs = NULL;
+   struct netns_ipvs *ipvs;
+   struct net *net;
+   LIST_HEAD(list);
+
+   ip_vs_service_nets_cleanup(net_list);   /* ip_vs_flush() with locks */
+   list_for_each_entry(net, net_list, exit_list) {
+   ipvs = net_ipvs(net);
+   ip_vs_conn_net_cleanup(ipvs);
+   ip_vs_app_net_cleanup(ipvs);
+   ip_vs_protocol_net_cleanup(ipvs);
+   ip_vs_control_net_cleanup(ipvs);
+   ip_vs_estimator_net_cleanup(ipvs);
+   IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
+   net->ipvs = NULL;
+   }
 }
 
 static int __net_init __ip_vs_dev_init(struct net *net)
@@ -2442,7 +2447,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net 
*net)
 
 static struct pernet_operations ipvs_core_ops = {
.init = __ip_vs_init,
-   .exit = __ip_vs_cleanup,
+   .exit_batch = __ip_vs_cleanup_batch,
.id   = &ip_vs_net_id,
.size = sizeof(struct netns_ipvs),
 };
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 07e0967..c8e652b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1607,14 +1607,21 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool 
cleanup)
 
 /*
  * Delete service by {netns} in the service table.
- * Called by __ip_vs_cleanup()
+ * Called by __ip_vs_batch_cleanup()
  */
-void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
+void ip_vs_service_nets_cleanup(struct list_head *net_list)
 {
+   struct netns_ipvs *ipvs;
+   struct net *net;
+   LIST_HEAD(list);
+
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
-   ip_vs_flush(ipvs, true);
+   list_for_each_entry(net, net_list, exit_list) {
+   ipvs = net_ipvs(net);
+   ip_vs_flush(ipvs, true);
+   }
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
 }
-- 
1.8.3.1

[net-next 2/2] ipvs: batch __ip_vs_dev_cleanup

2019-07-13 Thread Haishuang Yan

It's better to batch __ip_vs_cleanup to speedup ipvs
devices dismantle.

Signed-off-by: Haishuang Yan 
---
 net/netfilter/ipvs/ip_vs_core.c | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4d79b7..58af24a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2434,14 +2434,20 @@ static int __net_init __ip_vs_dev_init(struct net *net)
return ret;
 }
 
-static void __net_exit __ip_vs_dev_cleanup(struct net *net)
+static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
 {
-   struct netns_ipvs *ipvs = net_ipvs(net);
+   struct netns_ipvs *ipvs;
+   struct net *net;
+   LIST_HEAD(list);
+
EnterFunction(2);
-   nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
-   ipvs->enable = 0;   /* Disable packet reception */
-   smp_wmb();
-   ip_vs_sync_net_cleanup(ipvs);
+   list_for_each_entry(net, net_list, exit_list) {
+   ipvs = net_ipvs(net);
+   nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+   ipvs->enable = 0;   /* Disable packet reception */
+   smp_wmb();
+   ip_vs_sync_net_cleanup(ipvs);
+   }
LeaveFunction(2);
 }
 
@@ -2454,7 +2460,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net 
*net)
 
 static struct pernet_operations ipvs_core_dev_ops = {
.init = __ip_vs_dev_init,
-   .exit = __ip_vs_dev_cleanup,
+   .exit_batch = __ip_vs_dev_cleanup_batch,
 };
 
 /*
-- 
1.8.3.1

[net-next 0/2] ipvs: speedup ipvs netns dismantle

2019-07-13 Thread Haishuang Yan

Implement exit_batch() method to dismantle more ipvs netns
per round.

Haishuang Yan (2):
  ipvs: batch __ip_vs_cleanup
  ipvs: batch __ip_vs_dev_cleanup

 include/net/ip_vs.h |  2 +-
 net/netfilter/ipvs/ip_vs_core.c | 49 +
 net/netfilter/ipvs/ip_vs_ctl.c  | 13 ---
 3 files changed, 41 insertions(+), 23 deletions(-)

-- 
1.8.3.1

[PATCH] sit: use dst_cache in ipip6_tunnel_xmit

2019-07-14 Thread Haishuang Yan

Same as other ip tunnel, use dst_cache in xmit action to avoid
unnecessary fib lookups.

Signed-off-by: Haishuang Yan 
---
 net/ipv6/sit.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8061089..b2ccbc4 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -900,12 +900,17 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
   RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
   0, dst, tiph->saddr, 0, 0,
   sock_net_uid(tunnel->net, NULL));
-   rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
 
-   if (IS_ERR(rt)) {
-   dev->stats.tx_carrier_errors++;
-   goto tx_error_icmp;
+   rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
+   if (!rt) {
+   rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
+   if (IS_ERR(rt)) {
+   dev->stats.tx_carrier_errors++;
+   goto tx_error_icmp;
+   }
+   dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
}
+
if (rt->rt_type != RTN_UNICAST) {
ip_rt_put(rt);
dev->stats.tx_carrier_errors++;
-- 
1.8.3.1

[PATCH] ipip: validate header length in ipip_tunnel_xmit

2019-07-24 Thread Haishuang Yan

We need the same checks introduced by commit cb9f1b783850
("ip: validate header length on virtual device xmit") for
ipip tunnel.

Signed-off-by: Haishuang Yan 
---
 net/ipv4/ipip.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 43adfc1..2f01cf6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -275,6 +275,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
const struct iphdr  *tiph = &tunnel->parms.iph;
u8 ipproto;
 
+   if (!pskb_inet_may_pull(skb))
+   goto tx_error;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
ipproto = IPPROTO_IPIP;
-- 
1.8.3.1

[PATCH] ipip: validate header length in ipip_tunnel_xmit

2019-07-24 Thread Haishuang Yan

We need the same checks introduced by commit cb9f1b783850
("ip: validate header length on virtual device xmit") for
ipip tunnel.

Signed-off-by: Haishuang Yan 
---
 net/ipv4/ipip.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 43adfc1..2f01cf6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -275,6 +275,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
const struct iphdr  *tiph = &tunnel->parms.iph;
u8 ipproto;
 
+   if (!pskb_inet_may_pull(skb))
+   goto tx_error;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
ipproto = IPPROTO_IPIP;
-- 
1.8.3.1

[PATCH] ip6_tunnel: fix possible use-after-free on xmit

2019-07-25 Thread Haishuang Yan

ip4ip6/ip6ip6 tunnels run iptunnel_handle_offloads on xmit which
can cause a possible use-after-free accessing iph/ipv6h pointer
since the packet will be 'uncloned' running pskb_expand_head if
it is a cloned gso skb.

Fixes: 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated 
packets")
Signed-off-by: Haishuang Yan 
---
 net/ipv6/ip6_tunnel.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3134fbb..754a484 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1278,12 +1278,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device 
*dev, __u8 dsfield,
}
 
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+   dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
 
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
 
-   dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
-
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
 
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1367,12 +1366,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device 
*dev, __u8 dsfield,
}
 
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+   dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
 
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
 
-   dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
-
skb_set_inner_ipproto(skb, IPPROTO_IPV6);
 
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
-- 
1.8.3.1

[PATCH] ip6_gre: reload ipv6h in prepare_ip6gre_xmit_ipv6

2019-07-24 Thread Haishuang Yan

Since ip6_tnl_parse_tlv_enc_lim() can call pskb_may_pull()
which may change skb->data, so we need to re-load ipv6h at
the right place.

Fixes: 898b29798e36 ("ip6_gre: Refactor ip6gre xmit codes")
Cc: William Tu 
Signed-off-by: Haishuang Yan 
---
 net/ipv6/ip6_gre.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c2049c7..dd2d0b96 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -660,12 +660,13 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
struct flowi6 *fl6, __u8 *dsfield,
int *encap_limit)
 {
-   struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+   struct ipv6hdr *ipv6h;
struct ip6_tnl *t = netdev_priv(dev);
__u16 offset;
 
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+   ipv6h = ipv6_hdr(skb);
 
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
-- 
1.8.3.1

Re: [PATCH] openvswitch: Fix a possible memory leak on dst_cache

2019-07-18 Thread Haishuang Yan



> On 2019年7月19日, at 上午6:12, Gregory Rose  wrote:
> 
> On 7/18/2019 9:07 AM, Haishuang Yan wrote:
>> dst_cache should be destroyed when fail to add flow actions.
>> 
>> Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel")
>> Signed-off-by: Haishuang Yan 
>> ---
>>  net/openvswitch/flow_netlink.c | 1 +
>>  1 file changed, 1 insertion(+)
>> 
>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>> index d7559c6..1fd1cdd 100644
>> --- a/net/openvswitch/flow_netlink.c
>> +++ b/net/openvswitch/flow_netlink.c
>> @@ -2608,6 +2608,7 @@ static int validate_and_copy_set_tun(const struct 
>> nlattr *attr,
>>   sizeof(*ovs_tun), log);
>>  if (IS_ERR(a)) {
>>  dst_release((struct dst_entry *)tun_dst);
>> +dst_cache_destroy(&tun_dst->u.tun_info.dst_cache);
>>  return PTR_ERR(a);
>>  }
>>  
> 
> Nack.
> 
> dst_release will decrement the ref count and will call_rcu(&dst->rcu_head, 
> dst_destroy_rcu) if the ref count is zero.  No other net drivers call 
> dst_destroy SFAICT.
> 
> Haishuang,
> 
> are you trying to fix some specific problem here?
> 
> Thanks,
> 
> - Greg
> 
> 

Greg,

You’re right, dst_cache would be freed in metadata_dst_free:

  125
  126 if (dst->flags & DST_METADATA)
  127 metadata_dst_free((struct metadata_dst *)dst);
  128 else
  129 kmem_cache_free(dst->ops->kmem_cachep, dst);
  130

I thought I encountered a memory leak, but it seems not an issue, thanks for 
you explanation.

Re: [PATCH] ip6_gre: simplify gre header parsing in ip6gre_err

2018-09-10 Thread Haishuang Yan




> On 2018年9月10日, at 下午11:36, Jiri Benc  wrote:
> 
> On Mon, 10 Sep 2018 16:25:09 +0800, Haishuang Yan wrote:
>> +if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6),
>> + offset) < 0) {
>> +if (!csum_err)  /* ignore csum errors. */
>> +return;
>>  }
> 
> gre_parse_header stops parsing when csum_err is encountered. Which
> means tpi.key is undefined...
> 
>> 
>> -if (!pskb_may_pull(skb, offset + grehlen))
>> -return;
>>  ipv6h = (const struct ipv6hdr *)skb->data;
>> -greh = (const struct gre_base_hdr *)(skb->data + offset);
>> -key = key_off ? *(__be32 *)(skb->data + key_off) : 0;
>> -
>>  t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
>> - key, greh->protocol);
>> + tpi.key, tpi.proto);
> 
> ...and can't be used here.
> 
> Jiri
> 

You are right. Thanks for reviewing. So the same problem also arise in 
ipgre_err code:

   187 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
   188 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
   189  iph->daddr, iph->saddr, tpi->key);

Since csum_err may not be used outside, how about refactoring gre_parse_header 
function like this:

--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -86,7 +86,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info 
*tpi,

options = (__be32 *)(greh + 1);
if (greh->flags & GRE_CSUM) {
-   if (skb_checksum_simple_validate(skb)) {
+   if (csum_err && skb_checksum_simple_validate(skb)) {
*csum_err = true;
return -EINVAL;
}

And in gre_err function, we can call gre_parse_header(skb, &tpi, NULL, **) like 
this:

--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -234,11 +234,9 @@ static void gre_err(struct sk_buff *skb, u32 info)
struct tnl_ptk_info tpi;
bool csum_err = false;

-   if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
-iph->ihl * 4) < 0) {
-   if (!csum_err)  /* ignore csum errors. */
+   if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
+iph->ihl * 4) < 0)
return;
-   }

[PATCH v2] geneve: fix max_mtu setting

2016-07-02 Thread Haishuang Yan

For ipv6+udp+geneve encapsulation data, the max_mtu should subtract
sizeof(ipv6hdr), instead of sizeof(iphdr).

Signed-off-by: Haishuang Yan 
---
Changes in v2:
   - As suggested by Jesse Gross, treat AF_UNSPEC same as AF_INET4 to
avoid disallowing potentially valid configrations.
---
 drivers/net/geneve.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 310e0b9c..5de892f 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1036,12 +1036,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, 
struct net_device *dev)
 
 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool 
strict)
 {
+   struct geneve_dev *geneve = netdev_priv(dev);
/* The max_mtu calculation does not take account of GENEVE
 * options, to avoid excluding potentially valid
 * configurations.
 */
-   int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
-   - dev->hard_header_len;
+   int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
+
+   if (geneve->remote.sa.sa_family == AF_INET6)
+   max_mtu -= sizeof(struct ipv6hdr);
+   else
+   max_mtu -= sizeof(struct iphdr);
 
if (new_mtu < 68)
return -EINVAL;
-- 
1.8.3.1

[PATCH] sched, cgroup: enclose root_task_group with macro CONFIG_CGROUP_SCHED.

2016-10-30 Thread Haishuang Yan

root_task_group defined in sched/core.c is enclosed by
CONFIG_CGROUP_SCHED, so the export declaration should
also be enclosed.

Signed-off-by: Haishuang Yan 
---
 include/linux/init_task.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 325f649..f3f73fa 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -132,9 +132,8 @@
 
 extern struct cred init_cred;
 
-extern struct task_group root_task_group;
-
 #ifdef CONFIG_CGROUP_SCHED
+extern struct task_group root_task_group;
 # define INIT_CGROUP_SCHED(tsk)
\
.sched_task_group = &root_task_group,
 #else
-- 
1.8.3.1

[PATCH] geneve: fix ip_hdr_len reserved for geneve6 tunnel.

2016-11-27 Thread Haishuang Yan

It shold reserved sizeof(ipv6hdr) for geneve in ipv6 tunnel.

Fixes: c3ef5aa5e5 ('geneve: Merge ipv4 and ipv6 geneve_build_skb()')

Signed-off-by: Haishuang Yan 
---
 drivers/net/geneve.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 7b80e28..45301cb 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -852,7 +852,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct 
net_device *dev,
   ip_hdr(skb), skb);
ttl = key->ttl ? : ip6_dst_hoplimit(dst);
}
-   err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct iphdr));
+   err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr));
if (unlikely(err))
return err;
 
-- 
1.8.3.1

[PATCH] vxlan: fix a potential issue when create a new vxlan fdb entry.

2016-11-27 Thread Haishuang Yan

vxlan_fdb_append may return error, so add the proper check,
otherwise it will cause memory leak.

Signed-off-by: Haishuang Yan 
---
 drivers/net/vxlan.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 21e92be..3b7b237 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -611,6 +611,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
int notify = 0;
+   int rc = 0;
 
f = __vxlan_find_mac(vxlan, mac);
if (f) {
@@ -641,8 +642,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
if ((flags & NLM_F_APPEND) &&
(is_multicast_ether_addr(f->eth_addr) ||
 is_zero_ether_addr(f->eth_addr))) {
-   int rc = vxlan_fdb_append(f, ip, port, vni, ifindex,
- &rd);
+   rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
 
if (rc < 0)
return rc;
@@ -673,7 +673,11 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN);
 
-   vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+   rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+   if (rc < 0) {
+   kfree(f);
+   return rc;
+   }
 
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
-- 
1.8.3.1

[PATCH] ipv4: Namespaceify tcp_tw_reuse knob

2016-12-24 Thread Haishuang Yan

Signed-off-by: Haishuang Yan 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_ipv4.c|  4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index f0cf5a1..0378e88 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -110,6 +110,7 @@ struct netns_ipv4 {
int sysctl_tcp_orphan_retries;
int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat;
+   int sysctl_tcp_tw_reuse;
 
int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 207147b..6061963 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -252,7 +252,6 @@
 extern int sysctl_tcp_rmem[3];
 extern int sysctl_tcp_app_win;
 extern int sysctl_tcp_adv_win_scale;
-extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 80bc36b..22cbd61 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -433,13 +433,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, 
int write,
.extra2 = &tcp_adv_win_scale_max,
},
{
-   .procname   = "tcp_tw_reuse",
-   .data   = &sysctl_tcp_tw_reuse,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
-   {
.procname   = "tcp_frto",
.data   = &sysctl_tcp_frto,
.maxlen = sizeof(int),
@@ -960,6 +953,13 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, 
int write,
.mode   = 0644,
.proc_handler   = proc_dointvec,
},
+   {
+   .procname   = "tcp_tw_reuse",
+   .data   = &init_net.ipv4.sysctl_tcp_tw_reuse,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname   = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 30d81f5..fe9da4f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,7 +84,6 @@
 #include 
 #include 
 
-int sysctl_tcp_tw_reuse __read_mostly;
 int sysctl_tcp_low_latency __read_mostly;
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, 
void *twp)
   and use initial timestamp retrieved from peer table.
 */
if (tcptw->tw_ts_recent_stamp &&
-   (!twp || (sysctl_tcp_tw_reuse &&
+   (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
@@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_orphan_retries = 0;
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
+   net->ipv4.sysctl_tcp_tw_reuse = 0;
 
return 0;
 fail:
-- 
1.8.3.1

[PATCH v2] ipv4: Namespaceify tcp_tw_reuse knob

2016-12-24 Thread Haishuang Yan

Different namespaces might have different requirements to reuse
TIME-WAIT sockets for new connections. This might be required in
cases where different namespace applications are in place which
require TIME_WAIT socket connections to be reduced independently
of the host.

Signed-off-by: Haishuang Yan 

---
Changes in v2:
  - Make the commit message more clearer.
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_ipv4.c|  4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index f0cf5a1..0378e88 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -110,6 +110,7 @@ struct netns_ipv4 {
int sysctl_tcp_orphan_retries;
int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat;
+   int sysctl_tcp_tw_reuse;
 
int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 207147b..6061963 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -252,7 +252,6 @@
 extern int sysctl_tcp_rmem[3];
 extern int sysctl_tcp_app_win;
 extern int sysctl_tcp_adv_win_scale;
-extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 80bc36b..22cbd61 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -433,13 +433,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, 
int write,
.extra2 = &tcp_adv_win_scale_max,
},
{
-   .procname   = "tcp_tw_reuse",
-   .data   = &sysctl_tcp_tw_reuse,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
-   {
.procname   = "tcp_frto",
.data   = &sysctl_tcp_frto,
.maxlen = sizeof(int),
@@ -960,6 +953,13 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, 
int write,
.mode   = 0644,
.proc_handler   = proc_dointvec,
},
+   {
+   .procname   = "tcp_tw_reuse",
+   .data   = &init_net.ipv4.sysctl_tcp_tw_reuse,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname   = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 30d81f5..fe9da4f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,7 +84,6 @@
 #include 
 #include 
 
-int sysctl_tcp_tw_reuse __read_mostly;
 int sysctl_tcp_low_latency __read_mostly;
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, 
void *twp)
   and use initial timestamp retrieved from peer table.
 */
if (tcptw->tw_ts_recent_stamp &&
-   (!twp || (sysctl_tcp_tw_reuse &&
+   (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
@@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_orphan_retries = 0;
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
+   net->ipv4.sysctl_tcp_tw_reuse = 0;
 
return 0;
 fail:
-- 
1.8.3.1

[PATCH 2/2] ipv4: Namespaceify tcp_max_syn_backlog knob

2016-12-28 Thread Haishuang Yan

Different namespace application might require different maximal
number of remembered connection requests.

Signed-off-by: Haishuang Yan 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/request_sock.h |  4 +---
 net/core/request_sock.c|  2 --
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp.c |  2 --
 net/ipv4/tcp_input.c   |  4 ++--
 net/ipv4/tcp_ipv4.c|  7 +--
 7 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 99becaf..96b15a2 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -122,6 +122,7 @@ struct netns_ipv4 {
unsigned int sysctl_tcp_notsent_lowat;
int sysctl_tcp_tw_reuse;
struct inet_timewait_death_row tcp_death_row; 
+   int sysctl_max_syn_backlog;
 
int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 6ebe13e..a12a5d2 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -1,7 +1,7 @@
 /*
  * NET Generic infrastructure for Network protocols.
  *
- * Definitions for request_sock 
+ * Definitions for request_sock
  *
  * Authors:Arnaldo Carvalho de Melo 
  *
@@ -123,8 +123,6 @@ static inline void reqsk_put(struct request_sock *req)
reqsk_free(req);
 }
 
-extern int sysctl_max_syn_backlog;
-
 /*
  * For a TCP Fast Open listener -
  * lock - protects the access to all the reqsk, which is co-owned by
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 5d26056..9b8727c 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -34,8 +34,6 @@
  * and it will increase in proportion to the memory of machine.
  * Note : Dont forget somaxconn that may limit backlog too.
  */
-int sysctl_max_syn_backlog = 256;
-EXPORT_SYMBOL(sysctl_max_syn_backlog);
 
 void reqsk_queue_alloc(struct request_sock_queue *queue)
 {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 66f8f1b..134d8e1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -324,13 +324,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec
},
{
-   .procname   = "tcp_max_syn_backlog",
-   .data   = &sysctl_max_syn_backlog,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
-   {
.procname   = "inet_peer_threshold",
.data   = &inet_peer_threshold,
.maxlen = sizeof(int),
@@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_max_syn_backlog",
+   .data   = &init_net.ipv4.sysctl_max_syn_backlog,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname   = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 91938c9..f0637a9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3378,9 +3378,7 @@ void __init tcp_init(void)
 
 
cnt = tcp_hashinfo.ehash_mask + 1;
-
sysctl_tcp_max_orphans = cnt / 2;
-   sysctl_max_syn_backlog = max(128, cnt / 256);
 
tcp_init_mem();
/* Set per-socket limits to no more than 1/128 the pressure threshold */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c614802..ec6d843 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6377,8 +6377,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
}
/* Kill the following clause, if you dislike this way. */
else if (!net->ipv4.sysctl_tcp_syncookies &&
-(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) 
<
- (sysctl_max_syn_backlog >> 2)) &&
+(net->ipv4.sysctl_max_syn_backlog - 
inet_csk_reqsk_queue_len(sk) <
+ (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
 !tcp_peer_is_proven(req, dst, false,
 tmp_opt.saw_tstamp)) {
/* Without syncookies last quarter of
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 56b5f49..7e4be4f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2419,7 +2419,7 @@ static void __net_exit tcp_sk_exit(struct net *net)
 
 static int __net_init tcp_sk_init(struct net *net)
 {
-   int res, cpu;
+   int res, cpu, cn

[PATCH 1/2] ipv4: Namespaceify tcp_tw_recycle and tcp_max_tw_buckets knob

2016-12-28 Thread Haishuang Yan

Different namespace application might require fast recycling
TIME-WAIT sockets independently of the host.

Signed-off-by: Haishuang Yan 
---
 include/net/inet_timewait_sock.h | 13 +
 include/net/netns/ipv4.h | 11 +++
 include/net/tcp.h|  1 -
 net/ipv4/af_inet.c   |  2 --
 net/ipv4/inet_timewait_sock.c|  3 +--
 net/ipv4/proc.c  |  2 +-
 net/ipv4/sysctl_net_ipv4.c   | 28 ++--
 net/ipv4/tcp.c   |  3 ++-
 net/ipv4/tcp_input.c |  2 +-
 net/ipv4/tcp_ipv4.c  | 12 
 net/ipv4/tcp_minisocks.c | 14 +-
 net/ipv6/tcp_ipv6.c  |  7 ---
 12 files changed, 48 insertions(+), 50 deletions(-)

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index c9b3eb7..6a75d67 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -29,16 +29,6 @@
 
 #include 
 
-struct inet_hashinfo;
-
-struct inet_timewait_death_row {
-   atomic_ttw_count;
-
-   struct inet_hashinfo*hashinfo cacheline_aligned_in_smp;
-   int sysctl_tw_recycle;
-   int sysctl_max_tw_buckets;
-};
-
 struct inet_bind_bucket;
 
 /*
@@ -125,8 +115,7 @@ static inline void inet_twsk_reschedule(struct 
inet_timewait_sock *tw, int timeo
 
 void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
 
-void inet_twsk_purge(struct inet_hashinfo *hashinfo,
-struct inet_timewait_death_row *twdr, int family);
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
 
 static inline
 struct net *twsk_net(const struct inet_timewait_sock *twsk)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 0378e88..99becaf 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -27,6 +27,16 @@ struct ping_group_range {
kgid_t  range[2];
 };
 
+struct inet_hashinfo;
+
+struct inet_timewait_death_row {
+   atomic_ttw_count;
+
+   struct inet_hashinfo*hashinfo cacheline_aligned_in_smp;
+   int sysctl_tw_recycle;
+   int sysctl_max_tw_buckets;
+};
+
 struct netns_ipv4 {
 #ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -111,6 +121,7 @@ struct netns_ipv4 {
int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat;
int sysctl_tcp_tw_reuse;
+   struct inet_timewait_death_row tcp_death_row; 
 
int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6061963..1da0aa7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -231,7 +231,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
  */
 #defineTFO_SERVER_WO_SOCKOPT1  0x400
 
-extern struct inet_timewait_death_row tcp_death_row;
 
 /* sysctl variables for tcp */
 extern int sysctl_tcp_timestamps;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1830e6f..29b1dd9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1831,8 +1831,6 @@ static int __init inet_init(void)
 
ip_init();
 
-   tcp_v4_init();
-
/* Setup TCP slab cache for open requests. */
tcp_init();
 
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index ddcd56c..f8aff2c 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -257,8 +257,7 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, 
int timeo, bool rearm)
 }
 EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
 
-void inet_twsk_purge(struct inet_hashinfo *hashinfo,
-struct inet_timewait_death_row *twdr, int family)
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
 {
struct inet_timewait_sock *tw;
struct sock *sk;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 7143ca1..0247ca0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
   sock_prot_inuse_get(net, &tcp_prot), orphans,
-  atomic_read(&tcp_death_row.tw_count), sockets,
+  atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets,
   proto_memory_allocated(&tcp_prot));
seq_printf(seq, "UDP: inuse %d mem %ld\n",
   sock_prot_inuse_get(net, &udp_prot),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 22cbd61..66f8f1b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -290,13 +290,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec
},
{
-   .procname   =

[PATCH v2] vxlan: fix a potential issue when create a new vxlan fdb entry.

2016-11-28 Thread Haishuang Yan

vxlan_fdb_append may return error, so add the proper check,
otherwise it will cause memory leak.

Signed-off-by: Haishuang Yan 

Changes in v2:
  - Unnecessary to initialize rc to zero.
---
 drivers/net/vxlan.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 21e92be..bb70dd5 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -611,6 +611,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
int notify = 0;
+   int rc;
 
f = __vxlan_find_mac(vxlan, mac);
if (f) {
@@ -641,8 +642,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
if ((flags & NLM_F_APPEND) &&
(is_multicast_ether_addr(f->eth_addr) ||
 is_zero_ether_addr(f->eth_addr))) {
-   int rc = vxlan_fdb_append(f, ip, port, vni, ifindex,
- &rd);
+   rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
 
if (rc < 0)
return rc;
@@ -673,7 +673,11 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN);
 
-   vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+   rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+   if (rc < 0) {
+   kfree(f);
+   return rc;
+   }
 
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
-- 
1.8.3.1

[PATCH] openvswitch: add sanity check in queue_userspace_packet.

2016-11-28 Thread Haishuang Yan

kernel will crash in oops if genlmsg_put return NULL,
so add the sanity check.

Signed-off-by: Haishuang Yan 
---
 net/openvswitch/datapath.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 2d4c4d3..ceb1b1e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -474,6 +474,10 @@ static int queue_userspace_packet(struct datapath *dp, 
struct sk_buff *skb,
 
upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 0, upcall_info->cmd);
+   if (!upcall) {
+   err = -EMSGSIZE;
+   goto out;
+   }
upcall->dp_ifindex = dp_ifindex;
 
err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
-- 
1.8.3.1

[PATCH] geneve: fix max_mtu setting

2016-06-25 Thread Haishuang Yan

For ipv6+udp+geneve encapsulation data, the max_mtu should subtract
sizeof(ipv6hdr), instead of sizeof(iphdr).

Signed-off-by: Haishuang Yan 
---
 drivers/net/geneve.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index aa61708..c676d23 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1036,12 +1036,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, 
struct net_device *dev)
 
 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool 
strict)
 {
+   struct geneve_dev *geneve = netdev_priv(dev);
/* The max_mtu calculation does not take account of GENEVE
 * options, to avoid excluding potentially valid
 * configurations.
 */
-   int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
-   - dev->hard_header_len;
+   int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
+
+   if (geneve->remote.sa.sa_family == AF_INET)
+   max_mtu -= sizeof(struct iphdr);
+   else
+   max_mtu -= sizeof(struct ipv6hdr);
 
if (new_mtu < 68)
return -EINVAL;
-- 
1.8.3.1

[PATCH] openvswitch: Use proper buffer size in nla_memcpy

2016-03-28 Thread Haishuang Yan

For the input parameter count, it's better to use the size
of destination buffer size, as nla_memcpy would take into
account the length of the source netlink attribute when
a data is copied from an attribute.

Signed-off-by: Haishuang Yan 
---
 net/openvswitch/conntrack.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index dc5eb29..f8a8d43 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -968,7 +968,8 @@ static int parse_nat(const struct nlattr *attr,
break;
 
case OVS_NAT_ATTR_IP_MIN:
-   nla_memcpy(&info->range.min_addr, a, nla_len(a));
+   nla_memcpy(&info->range.min_addr, a,
+  sizeof(info->range.min_addr));
info->range.flags |= NF_NAT_RANGE_MAP_IPS;
break;
 
-- 
1.8.3.1

[PATCH] bridge: Allow set bridge ageing time when switchdev disabled

2016-03-29 Thread Haishuang Yan

When NET_SWITCHDEV=n, switchdev_port_attr_set will return -EOPNOTSUPP,
we should ignore this error code and continue to set the ageing time.

Signed-off-by: Haishuang Yan 
---
 net/bridge/br_stp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index e234490..9cb7044 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -582,7 +582,7 @@ int br_set_ageing_time(struct net_bridge *br, u32 
ageing_time)
int err;
 
err = switchdev_port_attr_set(br->dev, &attr);
-   if (err)
+   if (err && err != -EOPNOTSUPP)
return err;
 
br->ageing_time = t;
-- 
1.8.3.1

[PATCH] gre: fix return value of gre_rcv

2016-03-22 Thread Haishuang Yan

Dropped skb's should be documented by an appropriate return value.
Use the correct NET_RX_DROP and NET_RX_SUCCESS values for that reason.

Signed-off-by: Haishuang Yan 
---
 net/ipv4/ip_gre.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 31936d3..1dc0cdb 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -432,12 +432,12 @@ static int gre_rcv(struct sk_buff *skb)
goto drop;
 
if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
-   return 0;
+   return NET_RX_SUCCESS;
 
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 drop:
kfree_skb(skb);
-   return 0;
+   return NET_RX_DROP;
 }
 
 static __sum16 gre_checksum(struct sk_buff *skb)
-- 
1.8.3.1

[PATCH] vlan: propagate gso_min_segs

2016-03-22 Thread Haishuang Yan

vlan drivers lack proper propagation of gso_min_segs from lower device.

Signed-off-by: Haishuang Yan 
---
 drivers/net/ipvlan/ipvlan_main.c | 2 ++
 drivers/net/macvlan.c| 1 +
 net/8021q/vlan.c | 1 +
 net/8021q/vlan_dev.c | 1 +
 4 files changed, 5 insertions(+)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 57941d3..72a2517 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -120,6 +120,7 @@ static int ipvlan_init(struct net_device *dev)
dev->features |= NETIF_F_LLTX;
dev->gso_max_size = phy_dev->gso_max_size;
dev->gso_max_segs = phy_dev->gso_max_segs;
+   dev->gso_min_segs = phy_dev->gso_min_segs;
dev->hard_header_len = phy_dev->hard_header_len;
 
ipvlan_set_lockdep_class(dev);
@@ -594,6 +595,7 @@ static int ipvlan_device_event(struct notifier_block 
*unused,
ipvlan->dev->features = dev->features & IPVLAN_FEATURES;
ipvlan->dev->gso_max_size = dev->gso_max_size;
ipvlan->dev->gso_max_segs = dev->gso_max_segs;
+   ipvlan->dev->gso_min_segs = dev->gso_min_segs;
netdev_features_change(ipvlan->dev);
}
break;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 2bcf1f3..72991e9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1534,6 +1534,7 @@ static int macvlan_device_event(struct notifier_block 
*unused,
list_for_each_entry(vlan, &port->vlans, list) {
vlan->dev->gso_max_size = dev->gso_max_size;
vlan->dev->gso_max_segs = dev->gso_max_segs;
+   vlan->dev->gso_min_segs = dev->gso_min_segs;
netdev_update_features(vlan->dev);
}
break;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a1e273a..01a4de1 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -312,6 +312,7 @@ static void vlan_transfer_features(struct net_device *dev,
 
vlandev->gso_max_size = dev->gso_max_size;
vlandev->gso_max_segs = dev->gso_max_segs;
+   vlandev->gso_min_segs = dev->gso_min_segs;
 
if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
vlandev->hard_header_len = dev->hard_header_len;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e7e6257..752263d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -552,6 +552,7 @@ static int vlan_dev_init(struct net_device *dev)
 NETIF_F_GSO_SOFTWARE;
dev->gso_max_size = real_dev->gso_max_size;
dev->gso_max_segs = real_dev->gso_max_segs;
+   dev->gso_min_segs = real_dev->gso_min_segs;
if (dev->features & NETIF_F_VLAN_FEATURES)
netdev_warn(real_dev, "VLAN features are set incorrectly.  
Q-in-Q configurations may not work correctly.\n");
 
-- 
1.8.3.1

[PATCH] net: ping: make ping_v6_sendmsg static

2016-03-23 Thread Haishuang Yan

As ping_v6_sendmsg is used only in this file,
making it static

The body of "pingv6_prot" and "pingv6_protosw" were
moved at the middle of the file, to avoid having to
declare some static prototypes.

Signed-off-by: Haishuang Yan 
---
 include/net/ping.h |  1 -
 net/ipv6/ping.c| 59 +++---
 2 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/include/net/ping.h b/include/net/ping.h
index 5fd7cc2..4cd90d6 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -79,7 +79,6 @@ int  ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t 
len, int noblock,
  int flags, int *addr_len);
 int  ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
 void *user_icmph, size_t icmph_len);
-int  ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int  ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 bool ping_rcv(struct sk_buff *skb);
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 263a516..c382db7 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -26,35 +26,6 @@
 #include 
 #include 
 
-struct proto pingv6_prot = {
-   .name = "PINGv6",
-   .owner =THIS_MODULE,
-   .init = ping_init_sock,
-   .close =ping_close,
-   .connect =  ip6_datagram_connect_v6_only,
-   .disconnect =   udp_disconnect,
-   .setsockopt =   ipv6_setsockopt,
-   .getsockopt =   ipv6_getsockopt,
-   .sendmsg =  ping_v6_sendmsg,
-   .recvmsg =  ping_recvmsg,
-   .bind = ping_bind,
-   .backlog_rcv =  ping_queue_rcv_skb,
-   .hash = ping_hash,
-   .unhash =   ping_unhash,
-   .get_port = ping_get_port,
-   .obj_size = sizeof(struct raw6_sock),
-};
-EXPORT_SYMBOL_GPL(pingv6_prot);
-
-static struct inet_protosw pingv6_protosw = {
-   .type =  SOCK_DGRAM,
-   .protocol =  IPPROTO_ICMPV6,
-   .prot =  &pingv6_prot,
-   .ops =   &inet6_dgram_ops,
-   .flags = INET_PROTOSW_REUSE,
-};
-
-
 /* Compatibility glue so we can support IPv6 when it's compiled as a module */
 static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
 int *addr_len)
@@ -77,7 +48,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct 
in6_addr *addr,
return 0;
 }
 
-int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -192,6 +163,34 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, 
size_t len)
return len;
 }
 
+struct proto pingv6_prot = {
+   .name = "PINGv6",
+   .owner =THIS_MODULE,
+   .init = ping_init_sock,
+   .close =ping_close,
+   .connect =  ip6_datagram_connect_v6_only,
+   .disconnect =   udp_disconnect,
+   .setsockopt =   ipv6_setsockopt,
+   .getsockopt =   ipv6_getsockopt,
+   .sendmsg =  ping_v6_sendmsg,
+   .recvmsg =  ping_recvmsg,
+   .bind = ping_bind,
+   .backlog_rcv =  ping_queue_rcv_skb,
+   .hash = ping_hash,
+   .unhash =   ping_unhash,
+   .get_port = ping_get_port,
+   .obj_size = sizeof(struct raw6_sock),
+};
+EXPORT_SYMBOL_GPL(pingv6_prot);
+
+static struct inet_protosw pingv6_protosw = {
+   .type =  SOCK_DGRAM,
+   .protocol =  IPPROTO_ICMPV6,
+   .prot =  &pingv6_prot,
+   .ops =   &inet6_dgram_ops,
+   .flags = INET_PROTOSW_REUSE,
+};
+
 #ifdef CONFIG_PROC_FS
 static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
 {
-- 
1.8.3.1

Re: [PATCH v2,net-next] ip6_gre: fix a pontential issue in ip6erspan_rcv

2017-12-19 Thread Haishuang Yan



> On 2017年12月19日, at 下午11:34, David Miller  wrote:
> 
> From: Haishuang Yan 
> Date: Sat, 16 Dec 2017 10:25:25 +0800
> 
>> pskb_may_pull() can change skb->data, so we need to load ipv6h/ershdr at
>> the right place.
>> 
>> Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
>> Acked-by: William Tu 
>> Cc: William Tu 
>> Signed-off-by: Haishuang Yan 
> 
> This patch does not apply:
> 
>> +ipv6h = ipv6_hdr(skb);
>> +ershdr = (struct erspan_base_hdr *)skb->data;
>>  ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
>>  tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
>>  pkt_md = (struct erspan_metadata *)(ershdr + 1);
> 
> There is not "pkt_md = ..." assignment in net-next on this line.
> 

Okay, I will fix it and resubmit another commit, thanks.

Re: [PATCH v2,net-next 1/2] ip_gre: fix potential memory leak in erspan_rcv

2017-12-19 Thread Haishuang Yan



> On 2017年12月19日, at 下午11:36, David Miller  wrote:
> 
> From: Haishuang Yan 
> Date: Sat, 16 Dec 2017 10:48:38 +0800
> 
>> If md is NULL, tun_dst must be freed, otherwise it will cause memory
>> leak.
>> 
>> Fixes: 1a66a836da6 ("gre: add collect_md mode to ERSPAN tunnel")
>> Cc: William Tu 
>> Signed-off-by: Haishuang Yan 
>> 
>> Change since v2:
>>  * Rebase on latest master branch.
>>  * Correct wrong fix information.
> 
> Please do not put a changelog after the fixes and signoff tags, those tags 
> must
> appear last in the commit message.
> 
> Thank you.
> 

Okay, I will resubmit another commit, thanks.

[PATCH v3,net-next] ip6_gre: fix a pontential issue in ip6erspan_rcv

2017-12-19 Thread Haishuang Yan

pskb_may_pull() can change skb->data, so we need to load ipv6h/ershdr at
the right place.

Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
Cc: William Tu 
Acked-by: William Tu 
Signed-off-by: Haishuang Yan 

---
Change since v3:
  * Rebase on latest master branch.
  * Fix wrong commit information.
---
 net/ipv6/ip6_gre.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 87b9892..9bd1103 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -507,12 +507,11 @@ static int ip6erspan_rcv(struct sk_buff *skb, int 
gre_hdr_len,
struct ip6_tnl *tunnel;
u8 ver;
 
-   ipv6h = ipv6_hdr(skb);
-   ershdr = (struct erspan_base_hdr *)skb->data;
-
if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr
return PACKET_REJECT;
 
+   ipv6h = ipv6_hdr(skb);
+   ershdr = (struct erspan_base_hdr *)skb->data;
ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
 
-- 
1.8.3.1

[PATCH v3,net-next 1/2] ip_gre: fix potential memory leak in erspan_rcv

2017-12-19 Thread Haishuang Yan

If md is NULL, tun_dst must be freed, otherwise it will cause memory
leak.

Fixes: 1a66a836da6 ("gre: add collect_md mode to ERSPAN tunnel")
Cc: William Tu 
Signed-off-by: Haishuang Yan 

---
Changes since v3:
  * Rebase on latest master branch.
  * Fix wrong commit information.
---
 net/ipv4/ip_gre.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fd4d6e9..3029e3e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -313,8 +313,10 @@ static int erspan_rcv(struct sk_buff *skb, struct 
tnl_ptk_info *tpi,
return PACKET_REJECT;
 
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
-   if (!md)
+   if (!md) {
+   dst_release((struct dst_entry *)tun_dst);
return PACKET_REJECT;
+   }
 
memcpy(md, pkt_md, sizeof(*md));
md->version = ver;
-- 
1.8.3.1

[PATCH v3,net-next 0/2] net: erspan: fix potential memory leak

2017-12-19 Thread Haishuang Yan

This patch series fix potential memory leak issue.

Haishuang Yan (2):
  ip_gre: fix potential memory leak in erspan_rcv
  ip6_gre: fix potential memory leak in ip6erspan_rcv

 net/ipv4/ip_gre.c  | 4 +++-
 net/ipv6/ip6_gre.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

--
1.8.3.1

[PATCH v3,net-next 2/2] ip6_gre: fix potential memory leak in ip6erspan_rcv

2017-12-19 Thread Haishuang Yan

If md is NULL, tun_dst must be freed, otherwise it will cause memory
leak.

Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Cc: William Tu 
Signed-off-by: Haishuang Yan 

---
Changes since v3:
  * Rebase on latest master branch.
  * Fix wrong commit information.
---
 net/ipv6/ip6_gre.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 9bd1103..45038a9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -550,8 +550,10 @@ static int ip6erspan_rcv(struct sk_buff *skb, int 
gre_hdr_len,
 
info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
-   if (!md)
+   if (!md) {
+   dst_release((struct dst_entry *)tun_dst);
return PACKET_REJECT;
+   }
 
memcpy(md, pkt_md, sizeof(*md));
md->version = ver;
-- 
1.8.3.1

[PATCH v3,net-next 0/2] net: erspan: fix erspan_rcv/ip6erspan_rcv error path

2017-12-19 Thread Haishuang Yan

This patch series fix potential issue in error path.

Haishuang Yan (2):
  ip_gre: fix error path when erspan_rcv failed
  ip6_gre: fix error path when ip6erspan_rcv failed

 net/ipv4/ip_gre.c  | 2 ++
 net/ipv6/ip6_gre.c | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

-- 
1.8.3.1

[PATCH v3,net-next 1/2] ip_gre: fix error path when erspan_rcv failed

2017-12-19 Thread Haishuang Yan

When erspan_rcv call return PACKET_REJECT, we shoudn't call ipgre_rcv to
process packets again, instead send icmp unreachable message in error
path.

Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
Acked-by: William Tu 
Cc: William Tu 
Signed-off-by: Haishuang Yan 

---
Change since v3:
  * Rebase on latest master branch.
  * Fix wrong commit information.
---
 net/ipv4/ip_gre.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 3029e3e..90c9123 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -436,11 +436,13 @@ static int gre_rcv(struct sk_buff *skb)
 tpi.proto == htons(ETH_P_ERSPAN2))) {
if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
+   goto out;
}
 
if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
 
+out:
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 drop:
kfree_skb(skb);
-- 
1.8.3.1

[PATCH v3,net-next 2/2] ip6_gre: fix error path when ip6erspan_rcv failed

2017-12-19 Thread Haishuang Yan

Same as ipv4 code, when ip6erspan_rcv call return PACKET_REJECT, we
should call icmpv6_send to send icmp unreachable message in error path.

Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
Acked-by: William Tu 
Cc: William Tu 
Signed-off-by: Haishuang Yan 

---
Change since v2:
  * Rebase on latest master branch.
  * Fix wrong commit information.
---
 net/ipv6/ip6_gre.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 45038a9..8451d00 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -604,12 +604,13 @@ static int gre_rcv(struct sk_buff *skb)
 tpi.proto == htons(ETH_P_ERSPAN2))) {
if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
return 0;
-   goto drop;
+   goto out;
}
 
if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
return 0;
 
+out:
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 drop:
kfree_skb(skb);
-- 
1.8.3.1

[PATCH 1/2] ip_gre: fix potential memory leak in erspan_rcv

2017-12-14 Thread Haishuang Yan

If md is NULL, tun_dst must be freed, otherwise it will cause memory
leak.

Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
Cc: William Tu 
Signed-off-by: Haishuang Yan 
---
 net/ipv4/ip_gre.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d828821..9253d6f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -304,8 +304,10 @@ static int erspan_rcv(struct sk_buff *skb, struct 
tnl_ptk_info *tpi,
return PACKET_REJECT;
 
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
-   if (!md)
+   if (!md) {
+   dst_release((struct dst_entry *)tun_dst);
return PACKET_REJECT;
+   }
 
md->index = index;
info = &tun_dst->u.tun_info;
-- 
1.8.3.1

[PATCH 2/2] ip6_gre: fix potential memory leak in ip6erspan_rcv

2017-12-14 Thread Haishuang Yan

If md is NULL, tun_dst must be freed, otherwise it will cause memory
leak

Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
Cc: William Tu 
Signed-off-by: Haishuang Yan 
---
 net/ipv6/ip6_gre.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4562579..b8b0e4b 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -542,8 +542,10 @@ static int ip6erspan_rcv(struct sk_buff *skb, int 
gre_hdr_len,
 
info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
-   if (!md)
+   if (!md) {
+   dst_release((struct dst_entry *)tun_dst);
return PACKET_REJECT;
+   }
 
md->index = index;
info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
-- 
1.8.3.1

[PATCH 1/2] ip_gre: fix error path when erspan_rcv failed

2017-12-14 Thread Haishuang Yan

When erspan_rcv call return PACKET_REJECT, we shoudn't call ipgre_rcv to
process packets again, instead send icmp unreachable message in error
path.

Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
Cc: William Tu 
Signed-off-by: Haishuang Yan 
---
 net/ipv4/ip_gre.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9253d6f..61ee014 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -411,11 +411,13 @@ static int gre_rcv(struct sk_buff *skb)
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
+   goto out;
}
 
if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
 
+out:
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 drop:
kfree_skb(skb);
-- 
1.8.3.1

[PATCH 2/2] ip6_gre: fix error path when ip6erspan_rcv failed

2017-12-14 Thread Haishuang Yan

Same as ipv4 code, when ip6erspan_rcv call return PACKET_REJECT, we
should call icmpv6_send to send icmp unreachable message in error path.

Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
Cc: William Tu 
Signed-off-by: Haishuang Yan 
---
 net/ipv6/ip6_gre.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b8b0e4b..68e7eef 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -580,12 +580,13 @@ static int gre_rcv(struct sk_buff *skb)
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
return 0;
-   goto drop;
+   goto out;
}
 
if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
return 0;
 
+out:
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 drop:
kfree_skb(skb);
-- 
1.8.3.1

[PATCH] ip_gre: fix wrong return value of erspan_rcv

2017-12-14 Thread Haishuang Yan

If pskb_may_pull return failed, return PACKET_REJECT instead of -ENOMEM.

Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
Cc: William Tu 
Signed-off-by: Haishuang Yan 
---
 net/ipv4/ip_gre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 61ee014..d747d06 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -267,7 +267,7 @@ static int erspan_rcv(struct sk_buff *skb, struct 
tnl_ptk_info *tpi,
len = gre_hdr_len + sizeof(*ershdr);
 
if (unlikely(!pskb_may_pull(skb, len)))
-   return -ENOMEM;
+   return PACKET_REJECT;
 
iph = ip_hdr(skb);
ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
-- 
1.8.3.1

[PATCH] ip6_gre: fix a pontential issue in ip6erspan_rcv

2017-12-14 Thread Haishuang Yan

pskb_may_pull() can change skb->data, so we need to load ipv6h/ershdr at
the right place.

Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
Cc: William Tu 
Signed-off-by: Haishuang Yan 
---
 net/ipv6/ip6_gre.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 68e7eef..eab4b56 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -506,12 +506,12 @@ static int ip6erspan_rcv(struct sk_buff *skb, int 
gre_hdr_len,
struct ip6_tnl *tunnel;
__be32 index;
 
-   ipv6h = ipv6_hdr(skb);
-   ershdr = (struct erspanhdr *)skb->data;
-
if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr
return PACKET_REJECT;
 
+   ipv6h = ipv6_hdr(skb);
+   ershdr = (struct erspanhdr *)skb->data;
+
tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
index = ershdr->md.index;
 
-- 
1.8.3.1

[PATCH v2,net-next] ip6_gre: fix a pontential issue in ip6erspan_rcv

2017-12-15 Thread Haishuang Yan

pskb_may_pull() can change skb->data, so we need to load ipv6h/ershdr at
the right place.

Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
Acked-by: William Tu 
Cc: William Tu 
Signed-off-by: Haishuang Yan 

---
Change since v2:
  * Rebase on latest master.
---
 net/ipv6/ip6_gre.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f210f9c..aa1512e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -507,12 +507,11 @@ static int ip6erspan_rcv(struct sk_buff *skb, int 
gre_hdr_len,
struct ip6_tnl *tunnel;
u8 ver;
 
-   ipv6h = ipv6_hdr(skb);
-   ershdr = (struct erspan_base_hdr *)skb->data;
-
if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr
return PACKET_REJECT;
 
+   ipv6h = ipv6_hdr(skb);
+   ershdr = (struct erspan_base_hdr *)skb->data;
ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
pkt_md = (struct erspan_metadata *)(ershdr + 1);
-- 
1.8.3.1

[PATCH v2,net-next 1/2] ip_gre: fix potential memory leak in erspan_rcv

2017-12-15 Thread Haishuang Yan

If md is NULL, tun_dst must be freed, otherwise it will cause memory
leak.

Fixes: 1a66a836da6 ("gre: add collect_md mode to ERSPAN tunnel")
Cc: William Tu 
Signed-off-by: Haishuang Yan 

Change since v2:
  * Rebase on latest master branch.
  * Correct wrong fix information.
---
 net/ipv4/ip_gre.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 004800b..33af55a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -311,8 +311,10 @@ static int erspan_rcv(struct sk_buff *skb, struct 
tnl_ptk_info *tpi,
return PACKET_REJECT;
 
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
-   if (!md)
+   if (!md) {
+   dst_release((struct dst_entry *)tun_dst);
return PACKET_REJECT;
+   }
 
memcpy(md, pkt_md, sizeof(*md));
md->version = ver;
-- 
1.8.3.1

[PATCH v2,net-next 2/2] ip6_gre: fix potential memory leak in ip6erspan_rcv

2017-12-15 Thread Haishuang Yan

If md is NULL, tun_dst must be freed, otherwise it will cause memory
leak.

Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Cc: William Tu 
Signed-off-by: Haishuang Yan 

Change since v2:
  * Rebase on latest master branch.
  * Correct wrong fix information.
---
 net/ipv6/ip6_gre.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 5c9c65f..8ce9d42 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -549,8 +549,10 @@ static int ip6erspan_rcv(struct sk_buff *skb, int 
gre_hdr_len,
 
info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
-   if (!md)
+   if (!md) {
+   dst_release((struct dst_entry *)tun_dst);
return PACKET_REJECT;
+   }
 
memcpy(md, pkt_md, sizeof(*md));
md->version = ver;
-- 
1.8.3.1

[PATCH v2,net-next 2/2] ip6_gre: fix error path when ip6erspan_rcv failed

2017-12-15 Thread Haishuang Yan

Same as ipv4 code, when ip6erspan_rcv call return PACKET_REJECT, we
should call icmpv6_send to send icmp unreachable message in error path.

Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
Acked-by: William Tu 
Cc: William Tu 
Signed-off-by: Haishuang Yan 

Change since v2:
  * Rebase on latest master branch.
---
 net/ipv6/ip6_gre.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8ce9d42..f210f9c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -603,12 +603,13 @@ static int gre_rcv(struct sk_buff *skb)
 tpi.proto == htons(ETH_P_ERSPAN2))) {
if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
return 0;
-   goto drop;
+   goto out;
}
 
if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
return 0;
 
+out:
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 drop:
kfree_skb(skb);
-- 
1.8.3.1

[PATCH v2,net-next 1/2] ip_gre: fix error path when erspan_rcv failed

2017-12-15 Thread Haishuang Yan

When erspan_rcv call return PACKET_REJECT, we shoudn't call ipgre_rcv to
process packets again, instead send icmp unreachable message in error
path.

Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
Acked-by: William Tu 
Cc: William Tu 
Signed-off-by: Haishuang Yan 

Change since v2:
  * Rebase on latest master branch.
---
 net/ipv4/ip_gre.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 33af55a..ccfc5bc 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -434,11 +434,13 @@ static int gre_rcv(struct sk_buff *skb)
 tpi.proto == htons(ETH_P_ERSPAN2))) {
if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
+   goto out;
}
 
if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
 
+out:
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 drop:
kfree_skb(skb);
-- 
1.8.3.1

[PATCH net-next 2/2] geneve: speedup geneve tunnels dismantle

2017-12-16 Thread Haishuang Yan

Since we now hold RTNL lock in geneve_exit_net, it's better batch them
to speedup geneve tunnel dismantle.

Signed-off-by: Haishuang Yan 
---
 drivers/net/geneve.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b718a02..667c44f 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1638,19 +1638,16 @@ static __net_init int geneve_init_net(struct net *net)
return 0;
 }
 
-static void __net_exit geneve_exit_net(struct net *net)
+static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
 {
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *geneve, *next;
struct net_device *dev, *aux;
-   LIST_HEAD(list);
-
-   rtnl_lock();
 
/* gather any geneve devices that were moved into this ns */
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &geneve_link_ops)
-   unregister_netdevice_queue(dev, &list);
+   unregister_netdevice_queue(dev, head);
 
/* now gather any other geneve devices that were created in this ns */
list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
@@ -1658,18 +1655,29 @@ static void __net_exit geneve_exit_net(struct net *net)
 * to the list by the previous loop.
 */
if (!net_eq(dev_net(geneve->dev), net))
-   unregister_netdevice_queue(geneve->dev, &list);
+   unregister_netdevice_queue(geneve->dev, head);
}
 
+   WARN_ON_ONCE(!list_empty(&gn->sock_list));
+}
+
+static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
+{
+   struct net *net;
+   LIST_HEAD(list);
+
+   rtnl_lock();
+   list_for_each_entry(net, net_list, exit_list)
+   geneve_destroy_tunnels(net, &list);
+
/* unregister the devices gathered above */
unregister_netdevice_many(&list);
rtnl_unlock();
-   WARN_ON_ONCE(!list_empty(&gn->sock_list));
 }
 
 static struct pernet_operations geneve_net_ops = {
.init = geneve_init_net,
-   .exit = geneve_exit_net,
+   .exit_batch = geneve_exit_batch_net,
.id   = &geneve_net_id,
.size = sizeof(struct geneve_net),
 };
-- 
1.8.3.1

[PATCH net-next 0/2] net: speedup geneve/vxlan tunnels dismantle

2017-12-16 Thread Haishuang Yan

This patch series add batching to vxlan/geneve tunnels so that netns
dismantles are less costly.

Haishuang Yan (2):
  vxlan: speedup vxlan tunnels dismantle
  geneve: speedup geneve tunnels dismantle

 drivers/net/geneve.c | 24 
 drivers/net/vxlan.c  | 26 +-
 2 files changed, 33 insertions(+), 17 deletions(-)

--
1.8.3.1

[PATCH net-next 1/2] vxlan: speedup vxlan tunnels dismantle

2017-12-16 Thread Haishuang Yan

Since we now hold RTNL lock in vxlan_exit_net, it's better to batch them
to speedup vxlan tunnels dismantle.

Signed-off-by: Haishuang Yan 
---
 drivers/net/vxlan.c | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 19b9cc5..48a0dc2 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3692,18 +3692,16 @@ static __net_init int vxlan_init_net(struct net *net)
return 0;
 }
 
-static void __net_exit vxlan_exit_net(struct net *net)
+static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
 {
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan, *next;
struct net_device *dev, *aux;
unsigned int h;
-   LIST_HEAD(list);
 
-   rtnl_lock();
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &vxlan_link_ops)
-   unregister_netdevice_queue(dev, &list);
+   unregister_netdevice_queue(dev, head);
 
list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
/* If vxlan->dev is in the same netns, it has already been added
@@ -3711,20 +3709,30 @@ static void __net_exit vxlan_exit_net(struct net *net)
 */
if (!net_eq(dev_net(vxlan->dev), net)) {
gro_cells_destroy(&vxlan->gro_cells);
-   unregister_netdevice_queue(vxlan->dev, &list);
+   unregister_netdevice_queue(vxlan->dev, head);
}
}
 
-   unregister_netdevice_many(&list);
-   rtnl_unlock();
-
for (h = 0; h < PORT_HASH_SIZE; ++h)
WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
 }
 
+static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
+{
+   struct net *net;
+   LIST_HEAD(list);
+
+   rtnl_lock();
+   list_for_each_entry(net, net_list, exit_list)
+   vxlan_destroy_tunnels(net, &list);
+
+   unregister_netdevice_many(&list);
+   rtnl_unlock();
+}
+
 static struct pernet_operations vxlan_net_ops = {
.init = vxlan_init_net,
-   .exit = vxlan_exit_net,
+   .exit_batch = vxlan_exit_batch_net,
.id   = &vxlan_net_id,
.size = sizeof(struct vxlan_net),
 };
-- 
1.8.3.1

[PATCH v4 1/2] ip_tunnel: fix ip tunnel lookup in collect_md mode

2017-09-12 Thread Haishuang Yan

In collect_md mode, if the tun dev is down, it still can call
ip_tunnel_rcv to receive on packets, and the rx statistics increase
improperly.

When the md tunnel is down, it's not neccessary to increase RX drops
for the tunnel device, packets would be recieved on fallback tunnel,
and the RX drops on fallback device will be increased as expected.

Fixes: 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata.")
Cc: Pravin B Shelar 
Signed-off-by: Haishuang Yan 

---
Change since v4:
  * Make the commit message more clearer.
  * Fix wrong recipient addresss
---
 net/ipv4/ip_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e1856bf..e9805ad 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -176,7 +176,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net 
*itn,
return cand;
 
t = rcu_dereference(itn->collect_md_tun);
-   if (t)
+   if (t && t->dev->flags & IFF_UP)
return t;
 
if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
-- 
1.8.3.1

[PATCH v4 2/2] ip6_tunnel: fix ip6 tunnel lookup in collect_md mode

2017-09-12 Thread Haishuang Yan

In collect_md mode, if the tun dev is down, it still can call
__ip6_tnl_rcv to receive on packets, and the rx statistics increase
improperly.

When the md tunnel is down, it's not neccessary to increase RX drops
for the tunnel device, packets would be recieved on fallback tunnel,
and the RX drops on fallback device will be increased as expected.

Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels")
Cc: Alexei Starovoitov 
Signed-off-by: Haishuang Yan 

---
Change since v4:
  * Make the commit message more clearer
  * Fix wrong recipient address
---
 net/ipv6/ip6_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 10a693a..ae73164 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -171,7 +171,7 @@ static struct net_device_stats *ip6_get_stats(struct 
net_device *dev)
}
 
t = rcu_dereference(ip6n->collect_md_tun);
-   if (t)
+   if (t && t->dev->flags & IFF_UP)
return t;
 
t = rcu_dereference(ip6n->tnls_wc[0]);
-- 
1.8.3.1

[PATCH] ipv4: Namespaceify tcp_fastopen knob

2017-09-12 Thread Haishuang Yan

Different namespace application might require enable TCP Fast Open
feature independently of the host.

Reported-by: Luca BRUNO 
Signed-off-by: Haishuang Yan 
---
 include/net/netns/ipv4.h   |  2 ++
 include/net/tcp.h  |  1 -
 net/ipv4/af_inet.c |  7 ---
 net/ipv4/sysctl_net_ipv4.c | 42 +-
 net/ipv4/tcp.c |  4 ++--
 net/ipv4/tcp_fastopen.c| 13 ++---
 net/ipv4/tcp_ipv4.c|  2 ++
 samples/bpf/test_ipip.sh   |  2 ++
 8 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 305e031..ea0953b 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -128,6 +128,8 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_max_orphans;
+   int sysctl_tcp_fastopen;
+   unsigned int sysctl_tcp_fastopen_blackhole_timeout;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ac2d998..e4cc0dd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_fastopen;
 extern int sysctl_tcp_retrans_collapse;
 extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e..309b849 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
 {
struct sock *sk = sock->sk;
unsigned char old_state;
-   int err;
+   int err, tcp_fastopen;
 
lock_sock(sk);
 
@@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog)
 * because the socket was in TCP_LISTEN state previously but
 * was shutdown() rather than close().
 */
-   if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
-   (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+   tcp_fastopen =  sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+   if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+   (tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
tcp_fastopen_init_key_once(true);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4f26c8d3..30ebeb9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -394,27 +394,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler   = proc_dointvec
},
{
-   .procname   = "tcp_fastopen",
-   .data   = &sysctl_tcp_fastopen,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec,
-   },
-   {
-   .procname   = "tcp_fastopen_key",
-   .mode   = 0600,
-   .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
-   .proc_handler   = proc_tcp_fastopen_key,
-   },
-   {
-   .procname   = "tcp_fastopen_blackhole_timeout_sec",
-   .data   = &sysctl_tcp_fastopen_blackhole_timeout,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_tfo_blackhole_detect_timeout,
-   .extra1 = &zero,
-   },
-   {
.procname   = "tcp_abort_on_overflow",
.data   = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -1085,6 +1064,27 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_fastopen",
+   .data   = &init_net.ipv4.sysctl_tcp_fastopen,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec,
+   },
+   {
+   .procname   = "tcp_fastopen_key",
+   .mode   = 0600,
+   .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+   .proc_handler   = proc_tcp_fastopen_key,
+   },
+   {
+   .procname   = "tcp_fastopen_blackhole_timeout_sec",
+   .data   = 
&init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_tfo_blackhole_detect_timeout,
+

[PATCH v2] ipv4: Namespaceify tcp_fastopen knob

2017-09-13 Thread Haishuang Yan

Different namespace application might require enable TCP Fast Open
feature independently of the host.

Reported-by: Luca BRUNO 
Signed-off-by: Haishuang Yan 

---
Change since v2:
  * Remove unrelated change by mistake
---
 include/net/netns/ipv4.h   |  2 ++
 include/net/tcp.h  |  1 -
 net/ipv4/af_inet.c |  7 ---
 net/ipv4/sysctl_net_ipv4.c | 42 +-
 net/ipv4/tcp.c |  4 ++--
 net/ipv4/tcp_fastopen.c| 13 ++---
 net/ipv4/tcp_ipv4.c|  2 ++
 7 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 305e031..ea0953b 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -128,6 +128,8 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_max_orphans;
+   int sysctl_tcp_fastopen;
+   unsigned int sysctl_tcp_fastopen_blackhole_timeout;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ac2d998..e4cc0dd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_fastopen;
 extern int sysctl_tcp_retrans_collapse;
 extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e..309b849 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
 {
struct sock *sk = sock->sk;
unsigned char old_state;
-   int err;
+   int err, tcp_fastopen;
 
lock_sock(sk);
 
@@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog)
 * because the socket was in TCP_LISTEN state previously but
 * was shutdown() rather than close().
 */
-   if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
-   (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+   tcp_fastopen =  sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+   if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+   (tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
tcp_fastopen_init_key_once(true);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4f26c8d3..30ebeb9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -394,27 +394,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler   = proc_dointvec
},
{
-   .procname   = "tcp_fastopen",
-   .data   = &sysctl_tcp_fastopen,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec,
-   },
-   {
-   .procname   = "tcp_fastopen_key",
-   .mode   = 0600,
-   .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
-   .proc_handler   = proc_tcp_fastopen_key,
-   },
-   {
-   .procname   = "tcp_fastopen_blackhole_timeout_sec",
-   .data   = &sysctl_tcp_fastopen_blackhole_timeout,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_tfo_blackhole_detect_timeout,
-   .extra1 = &zero,
-   },
-   {
.procname   = "tcp_abort_on_overflow",
.data   = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -1085,6 +1064,27 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_fastopen",
+   .data   = &init_net.ipv4.sysctl_tcp_fastopen,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec,
+   },
+   {
+   .procname   = "tcp_fastopen_key",
+   .mode   = 0600,
+   .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+   .proc_handler   = proc_tcp_fastopen_key,
+   },
+   {
+   .procname   = "tcp_fastopen_blackhole_timeout_sec",
+   .data   = 
&init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_

[PATCH] be2net: Fix some u16 fields appropriately

2017-08-27 Thread Haishuang Yan

In be_tx_compl_process, frag_index declared as u32, so it's better to
declare last_index as u32 also.

CC: Ajit Khaparde 
Fixes: b0fd2eb28bd4 ("be2net: Declare some u16 fields as u32 to improve
performance")
Signed-off-by: Haishuang Yan 
---
 drivers/net/ethernet/emulex/benet/be.h  | 2 +-
 drivers/net/ethernet/emulex/benet/be_main.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h 
b/drivers/net/ethernet/emulex/benet/be.h
index 674cf9d..2ba4d61 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -255,7 +255,7 @@ struct be_tx_stats {
 /* Structure to hold some data of interest obtained from a TX CQE */
 struct be_tx_compl_info {
u8 status;  /* Completion status */
-   u16 end_index;  /* Completed TXQ Index */
+   u32 end_index;  /* Completed TXQ Index */
 };
 
 struct be_tx_obj {
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c 
b/drivers/net/ethernet/emulex/benet/be_main.c
index 319eee3..3645344 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2606,7 +2606,7 @@ static struct be_tx_compl_info *be_tx_compl_get(struct 
be_tx_obj *txo)
 }
 
 static u16 be_tx_compl_process(struct be_adapter *adapter,
-  struct be_tx_obj *txo, u16 last_index)
+  struct be_tx_obj *txo, u32 last_index)
 {
struct sk_buff **sent_skbs = txo->sent_skb_list;
struct be_queue_info *txq = &txo->q;
-- 
1.8.3.1

1 2 >

1 - 100 of 141 matches

Mail list logo