[PATCH] geneve: fix tx_errors statistics
Tx errors present summation of errors encountered while transmitting packets. Signed-off-by: Haishuang Yan --- drivers/net/geneve.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index aa61708..72b2f1c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -922,8 +922,8 @@ tx_error: dev->stats.collisions++; else if (err == -ENETUNREACH) dev->stats.tx_carrier_errors++; - else - dev->stats.tx_errors++; + + dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -1012,8 +1012,8 @@ tx_error: dev->stats.collisions++; else if (err == -ENETUNREACH) dev->stats.tx_carrier_errors++; - else - dev->stats.tx_errors++; + + dev->stats.tx_errors++; return NETDEV_TX_OK; } #endif -- 1.8.3.1
[PATCH 1/2] ip6_gre: Fix get_size calculation for gre6 tunnel
Do not include attribute IFLA_GRE_TOS. Signed-off-by: Haishuang Yan --- net/ipv6/ip6_gre.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ee62ec4..3c25fe6 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1394,8 +1394,6 @@ static size_t ip6gre_get_size(const struct net_device *dev) nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GRE_TTL */ nla_total_size(1) + - /* IFLA_GRE_TOS */ - nla_total_size(1) + /* IFLA_GRE_ENCAP_LIMIT */ nla_total_size(1) + /* IFLA_GRE_FLOWINFO */ @@ -1420,7 +1418,6 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || - /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) -- 1.8.3.1
[PATCH 2/2] gre: Fix wrong tpi->proto in WCCP
When dealing with WCCP in gre6 tunnel, it sets the wrong tpi->protocol, that is, ETH_P_IP instead of ETH_P_IPV6 for the encapuslated traffic. Signed-off-by: Haishuang Yan --- include/net/gre.h| 2 +- net/ipv4/gre_demux.c | 6 +++--- net/ipv4/ip_gre.c| 4 ++-- net/ipv6/ip6_gre.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/gre.h b/include/net/gre.h index a14093c..5dce30a 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -26,7 +26,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version); struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type); int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, -bool *csum_err); +bool *csum_err, __be16 proto); static inline int gre_calc_hlen(__be16 o_flags) { diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index d78e2ee..4c39f4f 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(gre_del_protocol); /* Fills in tpi and returns header length to be pulled. */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, -bool *csum_err) +bool *csum_err, __be16 proto) { const struct gre_base_hdr *greh; __be32 *options; @@ -109,11 +109,11 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, tpi->seq = 0; } /* WCCP version 1 and 2 protocol decoding. -* - Change protocol to IP +* - Change protocol to IPv4/IPv6 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { - tpi->proto = htons(ETH_P_IP); + tpi->proto = proto; if ((*(u8 *)options & 0xF0) != 0x40) hdr_len += 4; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2b267e7..aaeb478 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -222,7 +222,7 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err) < 0) { + if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -335,7 +335,7 @@ static int gre_rcv(struct sk_buff *skb) } #endif - hdr_len = gre_parse_header(skb, &tpi, &csum_err); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)); if (hdr_len < 0) goto drop; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3c25fe6..4541fa5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb) bool csum_err = false; int hdr_len; - hdr_len = gre_parse_header(skb, &tpi, &csum_err); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6)); if (hdr_len < 0) goto drop; -- 1.8.3.1
[PATCH v2 2/2] ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path.
In gre6 xmit path, we are sending a GRE packet, so set fl6 proto to IPPROTO_GRE properly. Signed-off-by: Haishuang Yan --- Changes in v2: - Initialize the flow protocol in ip6gre_tnl_link_config --- net/ipv6/ip6_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8ea5a4d..e706621 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -712,6 +712,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) fl6->daddr = p->raddr; fl6->flowi6_oif = p->link; fl6->flowlabel = 0; + fl6->flowi6_proto = IPPROTO_GRE; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; -- 1.8.3.1
[PATCH v2 1/2] ip6_gre: Fix MTU setting for ip6gretap
When creat an ip6gretap interface with an unreachable route, the MTU is about 14 bytes larger than what was needed. If the remote address is reachable: ping6 2001:0:130::1 -c 2 PING 2001:0:130::1(2001:0:130::1) 56 data bytes 64 bytes from 2001:0:130::1: icmp_seq=1 ttl=64 time=1.46 ms 64 bytes from 2001:0:130::1: icmp_seq=2 ttl=64 time=81.1 ms --- 2001:0:130::1 ping statistics --- 2 packets transmitted, 2 received, 0% packet loss, time 1001ms rtt min/avg/max/mdev = 1.465/41.316/81.167/39.851 ms ip link add ip6gretap1 type ip6gretap\ local 2001:0:130::2 remote 2001:0:130::1 ip link show ip6gretap1 11: ip6gretap1@NONE: mtu 1434 ... link/ether c2:f3:f8:c1:2c:bf brd ff:ff:ff:ff:ff:ff The MTU value 1434 is right. But if we delete the direct route: ip -6 route del 2001:0:130::/64 ping6 2001:0:130::1 -c 2 connect: Network is unreachable ip link add ip6gretap1 type ip6gretap\ local 2001:0:130::2 remote 2001:0:130::1 ip link show ip6gretap1 12: ip6gretap1@NONE: mtu 1448 ... link/ether 7e:e1:d2:c4:06:5e brd ff:ff:ff:ff:ff:ff Now, the MTU value 1448 is larger than what was needed. The reason is that if there is a reachable route, when run following code in ip6gre_tnl_link_config: if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, strict); if (!rt) return; if (rt->dst.dev) { dev->hard_header_len = rt->dst.dev->hard_header_len + t_hlen; if (set_mtu) { dev->mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; if (dev->type == ARPHRD_ETHER) dev->mtu -= ETH_HLEN; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } } ip6_rt_put(rt); } Because rt is not NULL here, so dev->mtu will subtract the ethernet header length later. But when rt is NULL, it just simply return, so dev->mtu doesn't update correctly in this situation. This patch first verify the dev->type is ARPHRD_ETHER for ip6gretap interface, and then decrease the mtu as early as possible. Signed-off-by: Haishuang Yan --- Changes in v2: - Make the commit message more clearer. --- net/ipv6/ip6_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4541fa5..8ea5a4d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1029,6 +1029,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; -- 1.8.3.1
[PATCH] ip_tunnel: enclose a code block in macro IS_ENABLED(CONFIG_IPV6)
For ipv6 case, enclose the code block in macro IS_ENABLED(CONFIG_IPV6). Signed-off-by: Haishuang Yan --- net/ipv4/ip_tunnel.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a69ed94..5f3c8de 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -665,10 +665,13 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->protocol == htons(ETH_P_IP)) { tos = inner_iph->tos; connected = false; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); connected = false; } +#endif } init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, -- 1.8.3.1
[PATCH v2] ip_tunnel: enclose a code block in macro IS_ENABLED(CONFIG_IPV6)
For ipv6 case, enclose the code block in macro IS_ENABLED(CONFIG_IPV6). --- Changes in v2: - Place the "#if IS_ENABLED" block before the "} else if (..) {" piece and the "#endif" before the closing brace and this becomes much easier to look at. Signed-off-by: Haishuang Yan --- net/ipv4/ip_tunnel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a69ed94..4256349 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -665,9 +665,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->protocol == htons(ETH_P_IP)) { tos = inner_iph->tos; connected = false; +#if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6)) { tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); connected = false; +#endif } } -- 1.8.3.1
[PATCH 2/2] ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path.
In gre6 xmit path, we are sending a GRE packet, so set fl6 proto to IPPROTO_GRE properly. Signed-off-by: Haishuang Yan --- net/ipv6/ip6_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8ea5a4d..cc84098 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -541,6 +541,7 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv4_get_dsfield(iph); @@ -595,6 +596,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) -- 1.8.3.1
[PATCH 1/2] ip6_gre: Fix MTU setting for ip6gretap
When creat an ip6gretap interface with an unreachable route, the MTU is about 14 bytes larger than what was needed. If the remote address is reachable: ping6 2001:0:130::1 -c 2 PING 2001:0:130::1(2001:0:130::1) 56 data bytes 64 bytes from 2001:0:130::1: icmp_seq=1 ttl=64 time=1.46 ms 64 bytes from 2001:0:130::1: icmp_seq=2 ttl=64 time=81.1 ms --- 2001:0:130::1 ping statistics --- 2 packets transmitted, 2 received, 0% packet loss, time 1001ms rtt min/avg/max/mdev = 1.465/41.316/81.167/39.851 ms ip link add ip6gretap1 type ip6gretap\ local 2001:0:130::2 remote 2001:0:130::1 ip link show ip6gretap1 11: ip6gretap1@NONE: mtu 1434 ... link/ether c2:f3:f8:c1:2c:bf brd ff:ff:ff:ff:ff:ff The MTU value 1434 is right. But if we delete the direct route: ip -6 route del 2001:0:130::/64 ping6 2001:0:130::1 -c 2 connect: Network is unreachable ip link add ip6gretap1 type ip6gretap\ local 2001:0:130::2 remote 2001:0:130::1 ip link show ip6gretap1 12: ip6gretap1@NONE: mtu 1448 ... link/ether 7e:e1:d2:c4:06:5e brd ff:ff:ff:ff:ff:ff Now, the MTU value 1448 is larger than what was needed. This patch fix the issue in this situation. Signed-off-by: Haishuang Yan --- net/ipv6/ip6_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4541fa5..8ea5a4d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1029,6 +1029,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; -- 1.8.3.1
[PATCH] veth: Fix potential memory leak in veth_newlink
Free peer netdev when failed to configure peer link or register dev. Signed-off-by: Haishuang Yan --- drivers/net/veth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index f37a6e6..8bb9fb8 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -472,7 +472,6 @@ err_register_dev: /* nothing to do */ err_configure_peer: unregister_netdevice(peer); - return err; err_register_peer: free_netdev(peer); -- 1.8.3.1
[PATCH] netlink: use nla_get_in_addr and nla_put_in_addr for ipv4 address
Since nla_get_in_addr and nla_put_in_addr were implemented, so use them appropriately. Signed-off-by: Haishuang Yan --- net/ipv4/ip_tunnel_core.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 02dd990..47ea85d 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -247,10 +247,10 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]); if (tb[LWTUNNEL_IP_DST]) - tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]); + tun_info->key.u.ipv4.dst = nla_get_in_addr(tb[LWTUNNEL_IP_DST]); if (tb[LWTUNNEL_IP_SRC]) - tun_info->key.u.ipv4.src = nla_get_be32(tb[LWTUNNEL_IP_SRC]); + tun_info->key.u.ipv4.src = nla_get_in_addr(tb[LWTUNNEL_IP_SRC]); if (tb[LWTUNNEL_IP_TTL]) tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]); @@ -275,8 +275,8 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || - nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || - nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || + nla_put_in_addr(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || + nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) -- 1.8.3.1
[PATCH] netfilter: unnecessary to check whether ip6_route_output() returns NULL
ip6_route_output() never returns NULL, so it is not appropriate to check if the return value is NULL. Signed-off-by: Haishuang Yan --- net/ipv6/netfilter/nf_reject_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 4709f65..a540022 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -158,7 +158,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.fl6_dport = otcph->source; security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { + if (dst->error) { dst_release(dst); return; } -- 1.8.3.1
[PATCH 2/2] ipv6: l2tp: fix a potential issue in l2tp_ip6_recv
pskb_may_pull() can change skb->data, so we have to load ptr/optr at the right place. Signed-off-by: Haishuang Yan --- net/l2tp/l2tp_ip6.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 6b54ff3..cd47990 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -136,12 +136,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -169,6 +168,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } -- 1.8.3.1
[PATCH 1/2] ipv4: l2tp: fix a potential issue in l2tp_ip_recv
pskb_may_pull() can change skb->data, so we have to load ptr/optr at the right place. Signed-off-by: Haishuang Yan --- net/l2tp/l2tp_ip.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index ec22078..42de4cc 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } -- 1.8.3.1
[PATCH v3 1/3] selftests: netfilter: add ipvs test script
Test virutal server via directing routing for IPv4. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v3: use bash style v2: optimize test script --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 184 + 2 files changed, 185 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 4144984..de1032b 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,6 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh + conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh new file mode 100755 index 000..6201046 --- /dev/null +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -0,0 +1,184 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--+ +# | | +# ns0 | ns1 | +# --- | ------| +# | veth01 | - | veth10 || veth12 || +# ---peer ------| +# | || | +# --- || | +# | br0| |- peer |--| +# --- || | +# | || | +# -- peer -- --- | +# | veth02 | - | veth20 | | veth21 | | +# -- | -- --- | +# | ns2 | +# | | +#--+ +# +# We assume that all network driver are loaded +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" + +sysipvsnet=/proc/sys/net/ipv4/vs/ +if [ ! -d /proc/sys/net/ipv4/vs/ ]; then +modprobe -q ip_vs +if [ $? -ne 0 ]; then +echo "SKIP: Could not run test without ipvs module" + exit $ksft_skip +fi +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ipvsadm -v > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ipvsadm" + exit $ksft_skip +fi + +nc --version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ncat" + exit $ksft_skip +fi + +setup() { +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth01 netns ns0 type veth peer name veth10 netns ns1 +ip link add veth02 netns ns0 type veth peer name veth20 netns ns2 +ip link add veth12 netns ns1 type veth peer name veth21 netns ns2 + +ip netns exec ns0 ip link set veth01 up +ip netns exec ns0 ip link set veth02 up +ip netns exec ns0 ip link add br0 type bridge +ip netns exec ns0 ip link set veth01 master br0 +ip netns exec ns0 ip link set veth02 master br0 +ip netns exec ns0 ip link set br0 up +ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0 + +ip netns exec ns1 ip link set lo up +ip netns exec ns1 ip link set veth10 up +ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10 +ip netns exec ns1 ip link set veth12 up +ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12 + +ip netns exec ns2 ip link set lo up +ip netns exec ns2 ip link set veth21 up +ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21 +ip netns exec ns2 ip link set veth20 up +ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20 +} + +cleanup() { +for i in 0 1 2 +do + ip netns del ns$i > /dev/null 2>&1 +done +pkill nc +} + +server_listen() { + ip netns exec ns2 nc -l -p 8080 > "${outfile}" & + ser
[PATCH v3 2/3] selftests: netfilter: add ipvs nat test case
Test virtual server via NAT. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # ipvs.sh: PASS Signed-off-by: Haishuang Yan --- tools/testing/selftests/netfilter/ipvs.sh | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index 6201046..270b5da 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -160,20 +160,40 @@ test_dr() { test_service } +test_nat() { +ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + +ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 +ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr +ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port} +ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + +ip netns exec ns2 ip link del veth20 +ip netns exec ns2 ip route add default via ${dip_v4} dev veth21 + +test_service +} + run_tests() { local errors= echo "Testing DR mode..." + cleanup setup test_dr errors=$(( $errors + $? )) + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + return $errors } trap cleanup EXIT -cleanup run_tests if [ $? -ne 0 ]; then -- 1.8.3.1
[PATCH v3 0/3] selftests: netfilter: introduce test cases for ipvs
This series patch include test cases for ipvs. The test topology is who as below: +--+ | | | | ns0 | ns1 | | --- | ------| | | veth01 | - | veth10 || veth12 || | ---peer ------| | | || | | --- || | | | br0| |- peer |--| | --- || | | | || | | -- peer -- --- | | | veth02 | - | veth20 | | veth12 | | | -- | -- --- | | | ns2 | | | | +--+ Test results: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Haishuang Yan (3): selftests: netfilter: add ipvs test script selftests: netfilter: add ipvs nat test case selftests: netfilter: add ipvs tunnel test case tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 234 + 2 files changed, 235 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh -- 1.8.3.1
[PATCH v3 3/3] selftests: netfilter: add ipvs tunnel test case
Test virtual server via ipip tunnel. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v2: optimize test script --- tools/testing/selftests/netfilter/ipvs.sh | 30 ++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index 270b5da..a8f6e70 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -174,6 +174,30 @@ test_nat() { test_service } +test_tun() { +ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + +ip netns exec ns1 modprobe ipip +ip netns exec ns1 ip link set tunl0 up +ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0 +ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0 +ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0 +ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr +ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port} +ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + +ip netns exec ns2 modprobe ipip +ip netns exec ns2 ip link set tunl0 up +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 +ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 +ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 +ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 + +test_service +} + run_tests() { local errors= @@ -189,6 +213,12 @@ run_tests() { test_nat errors=$(( $errors + $? )) + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + return $errors } -- 1.8.3.1
[PATCH] ip6erspan: remove the incorrect mtu limit for ip6erspan
ip6erspan driver calls ether_setup(), after commit 61e84623ace3 ("net: centralize net_device min/max MTU checking"), the range of mtu is [min_mtu, max_mtu], which is [68, 1500] by default. It causes the dev mtu of the erspan device to not be greater than 1500, this limit value is not correct for ip6erspan tap device. Signed-off-by: Haishuang Yan --- net/ipv6/ip6_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d5779d6..787d9f2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2192,6 +2192,7 @@ static void ip6erspan_tap_setup(struct net_device *dev) { ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &ip6erspan_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; -- 1.8.3.1
[PATCH v5 3/3] selftests: netfilter: add ipvs tunnel test case
Test virtual server via ipip tunnel. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v2: optimize test script --- tools/testing/selftests/netfilter/ipvs.sh | 30 ++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index 60250f7..edea729 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -168,6 +168,30 @@ test_nat() { test_service } +test_tun() { + ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + + ip netns exec ns1 modprobe ipip + ip netns exec ns1 ip link set tunl0 up + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0 + ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0 + ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0 + ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr + ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port} + ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + + ip netns exec ns2 modprobe ipip + ip netns exec ns2 ip link set tunl0 up + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 + ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 + ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 + + test_service +} + run_tests() { local errors= @@ -183,6 +207,12 @@ run_tests() { test_nat errors=$(( $errors + $? )) + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + return $errors } -- 1.8.3.1
[PATCH v5 2/3] selftests: netfilter: add ipvs nat test case
Test virtual server via NAT. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # ipvs.sh: PASS Signed-off-by: Haishuang Yan --- tools/testing/selftests/netfilter/ipvs.sh | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index f844c0a..60250f7 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -154,20 +154,40 @@ test_dr() { test_service } +test_nat() { + ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr + ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port} + ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + + ip netns exec ns2 ip link del veth20 + ip netns exec ns2 ip route add default via ${dip_v4} dev veth21 + + test_service +} + run_tests() { local errors= echo "Testing DR mode..." + cleanup setup test_dr errors=$(( $errors + $? )) + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + return $errors } trap cleanup EXIT -cleanup run_tests if [ $? -ne 0 ]; then -- 1.8.3.1
[PATCH v5 1/3] selftests: netfilter: add ipvs test script
Test virutal server via directing routing for IPv4. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v5: use cmp to compare two file contents suggested by Simon Horman v4: use #!/bin/bash -p suggested by Duncan Roe v3: use bash style v2: optimize test script --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 178 + 2 files changed, 179 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 4144984..de1032b 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,6 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh + conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh new file mode 100755 index 000..f844c0a --- /dev/null +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -0,0 +1,178 @@ +#!/bin/bash -p +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--+ +# | | +# ns0 | ns1 | +# --- | ------| +# | veth01 | - | veth10 || veth12 || +# ---peer ------| +# | || | +# --- || | +# | br0| |- peer |--| +# --- || | +# | || | +# -- peer -- --- | +# | veth02 | - | veth20 | | veth21 | | +# -- | -- --- | +# | ns2 | +# | | +#--+ +# +# We assume that all network driver are loaded +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" +readonly datalen=32 + +sysipvsnet="/proc/sys/net/ipv4/vs/" +if [ ! -d $sysipvsnet ]; then + modprobe -q ip_vs + if [ $? -ne 0 ]; then + echo "skip: could not run test without ipvs module" + exit $ksft_skip + fi +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ipvsadm -v > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ipvsadm" + exit $ksft_skip +fi + +setup() { + ip netns add ns0 + ip netns add ns1 + ip netns add ns2 + + ip link add veth01 netns ns0 type veth peer name veth10 netns ns1 + ip link add veth02 netns ns0 type veth peer name veth20 netns ns2 + ip link add veth12 netns ns1 type veth peer name veth21 netns ns2 + + ip netns exec ns0 ip link set veth01 up + ip netns exec ns0 ip link set veth02 up + ip netns exec ns0 ip link add br0 type bridge + ip netns exec ns0 ip link set veth01 master br0 + ip netns exec ns0 ip link set veth02 master br0 + ip netns exec ns0 ip link set br0 up + ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0 + + ip netns exec ns1 ip link set lo up + ip netns exec ns1 ip link set veth10 up + ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10 + ip netns exec ns1 ip link set veth12 up + ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12 + + ip netns exec ns2 ip link set lo up + ip netns exec ns2 ip link set veth21 up + ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21 + ip netns exec ns2 ip link set veth20 up + ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20 + + sleep 1 + + dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none +} + +cleanup() { +
[PATCH v5 0/3] selftests: netfilter: introduce test cases for ipvs
This series patch include test cases for ipvs. The test topology is who as below: +--+ | | | | ns0 | ns1 | | --- | ------| | | veth01 | - | veth10 || veth12 || | ---peer ------| | | || | | --- || | | | br0| |- peer |--| | --- || | | | || | | -- peer -- --- | | | veth02 | - | veth20 | | veth12 | | | -- | -- --- | | | ns2 | | | | +--+ Test results: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan Haishuang Yan (3): selftests: netfilter: add ipvs test script selftests: netfilter: add ipvs nat test case selftests: netfilter: add ipvs tunnel test case tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 228 + 2 files changed, 229 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh -- 1.8.3.1
Re: [PATCH v3 0/3] selftests: netfilter: introduce test cases for ipvs
> On 2019年10月4日, at 下午7:47, Duncan Roe wrote: > > On Thu, Oct 03, 2019 at 10:41:06PM +0800, Haishuang Yan wrote: >> >> >>> On 2019??10??2??, at 9:27, Duncan Roe >>> wrote: >>> >>> On Tue, Oct 01, 2019 at 09:34:13PM +0300, Julian Anastasov wrote: >>>> >>>>Hello, >>>> >>>> On Tue, 1 Oct 2019, Haishuang Yan wrote: >>>> >>>>> This series patch include test cases for ipvs. >>>>> >>>>> The test topology is who as below: >>>>> +--+ >>>>> | | | >>>>> | ns0 | ns1 | >>>>> | --- | ------| >>>>> | | veth01 | - | veth10 || veth12 || >>>>> | ---peer ------| >>>>> | | || | >>>>> | --- || | >>>>> | | br0| |- peer |--| >>>>> | --- || | >>>>> | | || | >>>>> | -- peer -- --- | >>>>> | | veth02 | - | veth20 | | veth12 | | >>>>> | -- | -- --- | >>>>> | | ns2 | >>>>> | | | >>>>> +--+ >>>>> >>>>> Test results: >>>>> # selftests: netfilter: ipvs.sh >>>>> # Testing DR mode... >>>>> # Testing NAT mode... >>>>> # Testing Tunnel mode... >>>>> # ipvs.sh: PASS >>>>> ok 6 selftests: netfilter: ipvs.sh >>>>> >>>>> Haishuang Yan (3): >>>>> selftests: netfilter: add ipvs test script >>>>> selftests: netfilter: add ipvs nat test case >>>>> selftests: netfilter: add ipvs tunnel test case >>>> >>>> Acked-by: Julian Anastasov >>>> >>>>> tools/testing/selftests/netfilter/Makefile | 2 +- >>>>> tools/testing/selftests/netfilter/ipvs.sh | 234 >>>>> + >>>>> 2 files changed, 235 insertions(+), 1 deletion(-) >>>>> create mode 100755 tools/testing/selftests/netfilter/ipvs.sh >>>> >>>> Regards >>>> >>>> -- >>>> Julian Anastasov >>> >>> I still prefer #!/bin/sh in 1/3. You never know what's in someone's >>> environment >>> >>> Cheers ... Duncan. >>> >> >> It??s also my preference too. "_" >> >> I have tested both #!/bin/bash and #!/bin/sh script, they all works properly. > > Enter these 2 lines: >> ip(){ return 0; } >> export -f ip > > Now try the #!/bin/bash script. If that now fails, try again with #!/bin/bash > changed to #!/bin/bash -p > > Any better now? > > Cheers ... Duncan. > It’s better now, thanks for your explanation. In v3 commit I will use #!/bin/bash -p to prevent exporting function from environment variables.
[PATCH v4 0/3] selftests: netfilter: introduce test cases for ipvs
This series patch include test cases for ipvs. The test topology is who as below: +--+ | | | | ns0 | ns1 | | --- | ------| | | veth01 | - | veth10 || veth12 || | ---peer ------| | | || | | --- || | | | br0| |- peer |--| | --- || | | | || | | -- peer -- --- | | | veth02 | - | veth20 | | veth12 | | | -- | -- --- | | | ns2 | | | | +--+ Test results: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Haishuang Yan (3): selftests: netfilter: add ipvs test script selftests: netfilter: add ipvs nat test case selftests: netfilter: add ipvs tunnel test case tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 234 + 2 files changed, 235 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh -- 1.8.3.1
[PATCH v4 1/3] selftests: netfilter: add ipvs test script
Test virutal server via directing routing for IPv4. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v4: use #!/bin/bash -p suggested by Duncan Roe v3: use bash style v2: optimize test script --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 184 + 2 files changed, 185 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 4144984..de1032b 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,6 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh + conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh new file mode 100755 index 000..f6da1bd --- /dev/null +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -0,0 +1,184 @@ +#!/bin/bash -p +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--+ +# | | +# ns0 | ns1 | +# --- | ------| +# | veth01 | - | veth10 || veth12 || +# ---peer ------| +# | || | +# --- || | +# | br0| |- peer |--| +# --- || | +# | || | +# -- peer -- --- | +# | veth02 | - | veth20 | | veth21 | | +# -- | -- --- | +# | ns2 | +# | | +#--+ +# +# We assume that all network driver are loaded +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" + +sysipvsnet=/proc/sys/net/ipv4/vs/ +if [ ! -d /proc/sys/net/ipv4/vs/ ]; then +modprobe -q ip_vs +if [ $? -ne 0 ]; then +echo "SKIP: Could not run test without ipvs module" + exit $ksft_skip +fi +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ipvsadm -v > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ipvsadm" + exit $ksft_skip +fi + +nc --version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ncat" + exit $ksft_skip +fi + +setup() { +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth01 netns ns0 type veth peer name veth10 netns ns1 +ip link add veth02 netns ns0 type veth peer name veth20 netns ns2 +ip link add veth12 netns ns1 type veth peer name veth21 netns ns2 + +ip netns exec ns0 ip link set veth01 up +ip netns exec ns0 ip link set veth02 up +ip netns exec ns0 ip link add br0 type bridge +ip netns exec ns0 ip link set veth01 master br0 +ip netns exec ns0 ip link set veth02 master br0 +ip netns exec ns0 ip link set br0 up +ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0 + +ip netns exec ns1 ip link set lo up +ip netns exec ns1 ip link set veth10 up +ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10 +ip netns exec ns1 ip link set veth12 up +ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12 + +ip netns exec ns2 ip link set lo up +ip netns exec ns2 ip link set veth21 up +ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21 +ip netns exec ns2 ip link set veth20 up +ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20 +} + +cleanup() { +for i in 0 1 2 +do + ip netns del ns$i > /dev/null 2>&1 +done +pkill nc +} + +server_listen() { + ip netns exec ns2 nc -l -p
[PATCH v4 3/3] selftests: netfilter: add ipvs tunnel test case
Test virtual server via ipip tunnel. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v2: optimize test script --- tools/testing/selftests/netfilter/ipvs.sh | 30 ++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index 2601a7c..48647ae 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -174,6 +174,30 @@ test_nat() { test_service } +test_tun() { +ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + +ip netns exec ns1 modprobe ipip +ip netns exec ns1 ip link set tunl0 up +ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0 +ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0 +ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0 +ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr +ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port} +ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + +ip netns exec ns2 modprobe ipip +ip netns exec ns2 ip link set tunl0 up +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 +ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 +ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 +ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 + +test_service +} + run_tests() { local errors= @@ -189,6 +213,12 @@ run_tests() { test_nat errors=$(( $errors + $? )) + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + return $errors } -- 1.8.3.1
[PATCH v4 2/3] selftests: netfilter: add ipvs nat test case
Test virtual server via NAT. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # ipvs.sh: PASS Signed-off-by: Haishuang Yan --- tools/testing/selftests/netfilter/ipvs.sh | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index f6da1bd..2601a7c 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -160,20 +160,40 @@ test_dr() { test_service } +test_nat() { +ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + +ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 +ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr +ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port} +ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + +ip netns exec ns2 ip link del veth20 +ip netns exec ns2 ip route add default via ${dip_v4} dev veth21 + +test_service +} + run_tests() { local errors= echo "Testing DR mode..." + cleanup setup test_dr errors=$(( $errors + $? )) + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + return $errors } trap cleanup EXIT -cleanup run_tests if [ $? -ne 0 ]; then -- 1.8.3.1
[PATCH 0/3] selftests: netfilter: introduce test cases for ipvs
This series patch include test cases for ipvs. The test topology is who as below: +--+ | | | | ns0 | ns1 | | --- | ------| | | veth01 | - | veth10 || veth12 || | ---peer ------| | | || | | --- || | | | br0| |- peer |--| | --- || | | | || | | -- peer -- --- | | | veth02 | - | veth20 | | veth12 | | | -- | -- --- | | | ns2 | | | | +--+ Test results: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Haishuang Yan (3): selftests: netfilter: add ipvs test script selftests: netfilter: add ipvs nat test case selftests: netfilter: add ipvs tunnel test case tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 230 + 2 files changed, 231 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh -- 1.8.3.1
[PATCH 1/3] selftests: netfilter: add ipvs test script
Test virutal server via directing routing for IPv4. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 177 + 2 files changed, 178 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 4144984..de1032b 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,6 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh + conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh new file mode 100755 index 000..15c386b --- /dev/null +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -0,0 +1,177 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--- +# | | +# ns0 | ns1 | +# --- | ------| +# | veth01 | - | veth10 || veth12 || +# ---peer ------| +# | || | +# --- || | +# | br0| |- peer |--| +# --- || | +# | || | +# -- peer -- --- | +# | veth02 | - | veth20 | | veth12 | | +# -- | -- --- | +# | ns2 | +# | | +#--- +# +# We assume that all network driver are loaded +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ipvsadm -v > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ipvsadm" + exit $ksft_skip +fi + +nc --version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ncat" + exit $ksft_skip +fi + +setup() { +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth01 netns ns0 type veth peer name veth10 netns ns1 +ip link add veth02 netns ns0 type veth peer name veth20 netns ns2 +ip link add veth12 netns ns1 type veth peer name veth21 netns ns2 + +ip netns exec ns0 ip link set veth01 up +ip netns exec ns0 ip link set veth02 up +ip netns exec ns0 ip link add br0 type bridge +ip netns exec ns0 ip link set veth01 master br0 +ip netns exec ns0 ip link set veth02 master br0 +ip netns exec ns0 ip link set br0 up +ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0 + +ip netns exec ns1 ip link set lo up +ip netns exec ns1 ip link set veth10 up +ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10 +ip netns exec ns1 ip link set veth12 up +ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12 + +ip netns exec ns2 ip link set lo up +ip netns exec ns2 ip link set veth21 up +ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21 +ip netns exec ns2 ip link set veth20 up +ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20 +} + +cleanup() { +for i in 0 1 2 +do + ip netns del ns$i > /dev/null 2>&1 +done +pkill nc +} + +server_listen() { + ip netns exec ns2 nc -l -p 8080 > "${outfile}" & + server_pid=$! + sleep 0.2 +} + +client_connect() { + ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}" +} + +verify_data() { + wait "${server_pid}" + # sha1sum returns two fields [sha1] [filepath] + # convert to bas
[PATCH 3/3] selftests: netfilter: add ipvs tunnel test case
Test virtual server via ipip tunnel. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- tools/testing/selftests/netfilter/ipvs.sh | 33 +++ 1 file changed, 33 insertions(+) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index 40058f9..2012cec 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -167,6 +167,33 @@ test_nat() { test_service } +test_tun() { +ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + +ip netns exec ns1 modprobe ipip +ip netns exec ns1 ip link set tunl0 up +ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0 +ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0 +ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0 +ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr +ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port} +ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + +ip netns exec ns2 modprobe ipip +ip netns exec ns2 ip link set tunl0 up +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 +ip netns exec ns2 sysctl -qw net.ipv4.conf.lo.arp_ignore=1 +ip netns exec ns2 sysctl -qw net.ipv4.conf.lo.arp_announce=2 +ip netns exec ns2 sysctl -qw net.ipv4.conf.lo.rp_filter=0 +ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 +ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 +ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 + +test_service +} + run_tests() { local errors= @@ -182,6 +209,12 @@ run_tests() { test_nat errors=$(( $errors + $? )) + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + return $errors } -- 1.8.3.1
[PATCH 2/3] selftests: netfilter: add ipvs nat test case
Test virtual server via NAT. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # ipvs.sh: PASS Signed-off-by: Haishuang Yan --- tools/testing/selftests/netfilter/ipvs.sh | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index 15c386b..40058f9 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -153,20 +153,40 @@ test_dr() { test_service } +test_nat() { +ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + +ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 +ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr +ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port} +ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + +ip netns exec ns2 ip link del veth20 +ip netns exec ns2 ip route add default via ${dip_v4} dev veth21 + +test_service +} + run_tests() { local errors= echo "Testing DR mode..." + cleanup setup test_dr errors=$(( $errors + $? )) + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + return $errors } trap cleanup EXIT -cleanup run_tests if [ $? -ne 0 ]; then -- 1.8.3.1
[PATCH v2 0/2] ipvs: speedup ipvs netns dismantle
Implement exit_batch() method to dismantle more ipvs netns per round. Tested: $ cat add_del_unshare.sh #!/bin/bash for i in `seq 1 100` do (for j in `seq 1 40` ; do unshare -n ipvsadm -A -t 172.16.$i.$j:80 >/dev/null ; done) & done wait; grep net_namespace /proc/slabinfo Befor patch: $ time sh add_del_unshare.sh net_namespace 4020 4020 473668 : tunables000 : slabdata670670 0 real0m8.086s user0m2.025s sys 0m36.956s After patch: $ time sh add_del_unshare.sh net_namespace 4020 4020 473668 : tunables000 : slabdata670670 0 real0m7.623s user0m2.003s sys 0m32.935s Haishuang Yan (2): ipvs: batch __ip_vs_cleanup ipvs: batch __ip_vs_dev_cleanup include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_core.c | 47 - net/netfilter/ipvs/ip_vs_ctl.c | 12 --- 3 files changed, 38 insertions(+), 23 deletions(-) -- 1.8.3.1
[PATCH v2 1/2] ipvs: batch __ip_vs_cleanup
It's better to batch __ip_vs_cleanup to speedup ipvs connections dismantle. Signed-off-by: Haishuang Yan --- v2: remove unused pointer list --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_core.c | 28 net/netfilter/ipvs/ip_vs_ctl.c | 12 +--- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3759167..93e7a25 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1324,7 +1324,7 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp) void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs); void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs); void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs); -void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs); +void ip_vs_service_nets_cleanup(struct list_head *net_list); /* IPVS application functions * (from ip_vs_app.c) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 8b80ab7..93cfb47 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2402,18 +2402,22 @@ static int __net_init __ip_vs_init(struct net *net) return -ENOMEM; } -static void __net_exit __ip_vs_cleanup(struct net *net) +static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list) { - struct netns_ipvs *ipvs = net_ipvs(net); - - ip_vs_service_net_cleanup(ipvs);/* ip_vs_flush() with locks */ - ip_vs_conn_net_cleanup(ipvs); - ip_vs_app_net_cleanup(ipvs); - ip_vs_protocol_net_cleanup(ipvs); - ip_vs_control_net_cleanup(ipvs); - ip_vs_estimator_net_cleanup(ipvs); - IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); - net->ipvs = NULL; + struct netns_ipvs *ipvs; + struct net *net; + + ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */ + list_for_each_entry(net, net_list, exit_list) { + ipvs = net_ipvs(net); + ip_vs_conn_net_cleanup(ipvs); + ip_vs_app_net_cleanup(ipvs); + ip_vs_protocol_net_cleanup(ipvs); + ip_vs_control_net_cleanup(ipvs); + ip_vs_estimator_net_cleanup(ipvs); + IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); + net->ipvs = NULL; + } } static int __net_init __ip_vs_dev_init(struct net *net) @@ -2442,7 +2446,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) static struct pernet_operations ipvs_core_ops = { .init = __ip_vs_init, - .exit = __ip_vs_cleanup, + .exit_batch = __ip_vs_cleanup_batch, .id = &ip_vs_net_id, .size = sizeof(struct netns_ipvs), }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 8b48e7c..153c77b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1607,14 +1607,20 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) /* * Delete service by {netns} in the service table. - * Called by __ip_vs_cleanup() + * Called by __ip_vs_batch_cleanup() */ -void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs) +void ip_vs_service_nets_cleanup(struct list_head *net_list) { + struct netns_ipvs *ipvs; + struct net *net; + EnterFunction(2); /* Check for "full" addressed entries */ mutex_lock(&__ip_vs_mutex); - ip_vs_flush(ipvs, true); + list_for_each_entry(net, net_list, exit_list) { + ipvs = net_ipvs(net); + ip_vs_flush(ipvs, true); + } mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } -- 1.8.3.1
[PATCH v2 2/2] ipvs: batch __ip_vs_dev_cleanup
It's better to batch __ip_vs_cleanup to speedup ipvs devices dismantle. Signed-off-by: Haishuang Yan --- v2: remove unused pointer list --- net/netfilter/ipvs/ip_vs_core.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 93cfb47..512259f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2433,14 +2433,19 @@ static int __net_init __ip_vs_dev_init(struct net *net) return ret; } -static void __net_exit __ip_vs_dev_cleanup(struct net *net) +static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list) { - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs; + struct net *net; + EnterFunction(2); - nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); - ipvs->enable = 0; /* Disable packet reception */ - smp_wmb(); - ip_vs_sync_net_cleanup(ipvs); + list_for_each_entry(net, net_list, exit_list) { + ipvs = net_ipvs(net); + nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + ipvs->enable = 0; /* Disable packet reception */ + smp_wmb(); + ip_vs_sync_net_cleanup(ipvs); + } LeaveFunction(2); } @@ -2453,7 +2458,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) static struct pernet_operations ipvs_core_dev_ops = { .init = __ip_vs_dev_init, - .exit = __ip_vs_dev_cleanup, + .exit_batch = __ip_vs_dev_cleanup_batch, }; /* -- 1.8.3.1
[PATCH v2 2/3] selftests: netfilter: add ipvs nat test case
Test virtual server via NAT. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # ipvs.sh: PASS Signed-off-by: Haishuang Yan --- tools/testing/selftests/netfilter/ipvs.sh | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index 658c06b..e95453b 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -160,20 +160,40 @@ test_dr() { test_service } +test_nat() { +ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + +ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 +ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr +ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port} +ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + +ip netns exec ns2 ip link del veth20 +ip netns exec ns2 ip route add default via ${dip_v4} dev veth21 + +test_service +} + run_tests() { local errors= echo "Testing DR mode..." + cleanup setup test_dr errors=$(( $errors + $? )) + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + return $errors } trap cleanup EXIT -cleanup run_tests if [ $? -ne 0 ]; then -- 1.8.3.1
[PATCH v2 0/3] selftests: netfilter: introduce test cases for ipvs
This series patch include test cases for ipvs. The test topology is who as below: +--+ | | | | ns0 | ns1 | | --- | ------| | | veth01 | - | veth10 || veth12 || | ---peer ------| | | || | | --- || | | | br0| |- peer |--| | --- || | | | || | | -- peer -- --- | | | veth02 | - | veth20 | | veth21 | | | -- | -- --- | | | ns2 | | | | +--+ Test results: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Haishuang Yan (3): selftests: netfilter: add ipvs test script selftests: netfilter: add ipvs nat test case selftests: netfilter: add ipvs tunnel test case tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 234 + 2 files changed, 235 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh -- 1.8.3.1
[PATCH v2 3/3] selftests: netfilter: add ipvs tunnel test case
Test virtual server via ipip tunnel. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v2: optimize test script --- tools/testing/selftests/netfilter/ipvs.sh | 30 ++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index e95453b..b09994e 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -174,6 +174,30 @@ test_nat() { test_service } +test_tun() { +ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + +ip netns exec ns1 modprobe ipip +ip netns exec ns1 ip link set tunl0 up +ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0 +ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0 +ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0 +ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr +ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port} +ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + +ip netns exec ns2 modprobe ipip +ip netns exec ns2 ip link set tunl0 up +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 +ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 +ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 +ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 +ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 + +test_service +} + run_tests() { local errors= @@ -189,6 +213,12 @@ run_tests() { test_nat errors=$(( $errors + $? )) + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + return $errors } -- 1.8.3.1
[PATCH v2 1/3] selftests: netfilter: add ipvs test script
Test virutal server via directing routing for IPv4. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v2: optimize test script --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 184 + 2 files changed, 185 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 4144984..de1032b 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,6 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh + conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh new file mode 100755 index 000..658c06b --- /dev/null +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -0,0 +1,184 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--+ +# | | +# ns0 | ns1 | +# --- | ------| +# | veth01 | - | veth10 || veth12 || +# ---peer ------| +# | || | +# --- || | +# | br0| |- peer |--| +# --- || | +# | || | +# -- peer -- --- | +# | veth02 | - | veth20 | | veth21 | | +# -- | -- --- | +# | ns2 | +# | | +#--+ +# +# We assume that all network driver are loaded +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" + +sysipvsnet=/proc/sys/net/ipv4/vs/ +if [ ! -d /proc/sys/net/ipv4/vs/ ]; then +modprobe -q ip_vs +if [ $? -ne 0 ]; then +echo "SKIP: Could not run test without ipvs module" + exit $ksft_skip +fi +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ipvsadm -v > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ipvsadm" + exit $ksft_skip +fi + +nc --version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ncat" + exit $ksft_skip +fi + +setup() { +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth01 netns ns0 type veth peer name veth10 netns ns1 +ip link add veth02 netns ns0 type veth peer name veth20 netns ns2 +ip link add veth12 netns ns1 type veth peer name veth21 netns ns2 + +ip netns exec ns0 ip link set veth01 up +ip netns exec ns0 ip link set veth02 up +ip netns exec ns0 ip link add br0 type bridge +ip netns exec ns0 ip link set veth01 master br0 +ip netns exec ns0 ip link set veth02 master br0 +ip netns exec ns0 ip link set br0 up +ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0 + +ip netns exec ns1 ip link set lo up +ip netns exec ns1 ip link set veth10 up +ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10 +ip netns exec ns1 ip link set veth12 up +ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12 + +ip netns exec ns2 ip link set lo up +ip netns exec ns2 ip link set veth21 up +ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21 +ip netns exec ns2 ip link set veth20 up +ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20 +} + +cleanup() { +for i in 0 1 2 +do + ip netns del ns$i > /dev/null 2>&1 +done +pkill nc +} + +server_listen() { + ip netns exec ns2 nc -l -p 8080 > "${outfile}" & + server_pid=$! +
[PATCH] erspan: remove the incorrect mtu limit for erspan
erspan driver calls ether_setup(), after commit 61e84623ace3 ("net: centralize net_device min/max MTU checking"), the range of mtu is [min_mtu, max_mtu], which is [68, 1500] by default. It causes the dev mtu of the erspan device to not be greater than 1500, this limit value is not correct for ipgre tap device. Tested: Before patch: # ip link set erspan0 mtu 1600 Error: mtu greater than device maximum. After patch: # ip link set erspan0 mtu 1600 # ip -d link show erspan0 21: erspan0@NONE: mtu 1600 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 0 Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") Signed-off-by: Haishuang Yan --- net/ipv4/ip_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a53a543..52690bb 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1446,6 +1446,7 @@ static void erspan_setup(struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &erspan_netdev_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; -- 1.8.3.1
Re: [net-next 1/2] ipvs: batch __ip_vs_cleanup
> On 2019年7月16日, at 上午4:39, Julian Anastasov wrote: > > > Hello, > > On Sat, 13 Jul 2019, Haishuang Yan wrote: > >> It's better to batch __ip_vs_cleanup to speedup ipvs >> connections dismantle. >> >> Signed-off-by: Haishuang Yan >> --- >> include/net/ip_vs.h | 2 +- >> net/netfilter/ipvs/ip_vs_core.c | 29 + >> net/netfilter/ipvs/ip_vs_ctl.c | 13 ++--- >> 3 files changed, 28 insertions(+), 16 deletions(-) >> >> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h >> index 3759167..93e7a25 100644 >> --- a/include/net/ip_vs.h >> +++ b/include/net/ip_vs.h >> @@ -1324,7 +1324,7 @@ static inline void ip_vs_control_del(struct ip_vs_conn >> *cp) >> void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs); >> void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs); >> void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs); >> -void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs); >> +void ip_vs_service_nets_cleanup(struct list_head *net_list); >> >> /* IPVS application functions >> * (from ip_vs_app.c) >> diff --git a/net/netfilter/ipvs/ip_vs_core.c >> b/net/netfilter/ipvs/ip_vs_core.c >> index 46f06f9..b4d79b7 100644 >> --- a/net/netfilter/ipvs/ip_vs_core.c >> +++ b/net/netfilter/ipvs/ip_vs_core.c >> @@ -2402,18 +2402,23 @@ static int __net_init __ip_vs_init(struct net *net) >> return -ENOMEM; >> } >> >> -static void __net_exit __ip_vs_cleanup(struct net *net) >> +static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list) >> { >> -struct netns_ipvs *ipvs = net_ipvs(net); >> - >> -ip_vs_service_net_cleanup(ipvs);/* ip_vs_flush() with locks */ >> -ip_vs_conn_net_cleanup(ipvs); >> -ip_vs_app_net_cleanup(ipvs); >> -ip_vs_protocol_net_cleanup(ipvs); >> -ip_vs_control_net_cleanup(ipvs); >> -ip_vs_estimator_net_cleanup(ipvs); >> -IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); >> -net->ipvs = NULL; >> +struct netns_ipvs *ipvs; >> +struct net *net; >> +LIST_HEAD(list); >> + >> +ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */ >> +list_for_each_entry(net, net_list, exit_list) { > > How much faster is to replace list_for_each_entry in > ops_exit_list() with this one. IPVS can waste time in calls > such as kthread_stop() and del_timer_sync() but I'm not sure > we can solve it easily. What gain do you see in benchmarks? Hi, As the following benchmark testing results show, there is a little performance improvement: $ cat add_del_unshare.sh #!/bin/bash for i in `seq 1 100` do (for j in `seq 1 40` ; do unshare -n ipvsadm -A -t 172.16.$i.$j:80 >/dev/null ; done) & done wait; grep net_namespace /proc/slabinfo Befor patch: $ time sh add_del_unshare.sh net_namespace 4020 4020 473668 : tunables000 : slabdata670670 0 real0m8.086s user0m2.025s sys 0m36.956s After patch: $ time sh add_del_unshare.sh net_namespace 4020 4020 473668 : tunables000 : slabdata670670 0 real0m7.623s user0m2.003s sys 0m32.935s > >> +ipvs = net_ipvs(net); >> +ip_vs_conn_net_cleanup(ipvs); >> +ip_vs_app_net_cleanup(ipvs); >> +ip_vs_protocol_net_cleanup(ipvs); >> +ip_vs_control_net_cleanup(ipvs); >> +ip_vs_estimator_net_cleanup(ipvs); >> +IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); >> +net->ipvs = NULL; >> +} >> } > > Regards > > -- > Julian Anastasov >
[PATCH] openvswitch: Fix a possible memory leak on dst_cache
dst_cache should be destroyed when fail to add flow actions. Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel") Signed-off-by: Haishuang Yan --- net/openvswitch/flow_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d7559c6..1fd1cdd 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2608,6 +2608,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, sizeof(*ovs_tun), log); if (IS_ERR(a)) { dst_release((struct dst_entry *)tun_dst); + dst_cache_destroy(&tun_dst->u.tun_info.dst_cache); return PTR_ERR(a); } -- 1.8.3.1
[PATCH v6 1/3] selftests: netfilter: add ipvs test script
Test virutal server via directing routing for IPv4. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v6: use #!/bin/sh v5: use cmp to compare two file contents suggested by Simon Horman v4: use #!/bin/bash -p suggested by Duncan Roe v3: use bash style v2: optimize test script --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 178 + 2 files changed, 179 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 4144984..de1032b 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,6 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh + conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh new file mode 100755 index 000..3d11d87 --- /dev/null +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -0,0 +1,178 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--+ +# | | +# ns0 | ns1 | +# --- | ------| +# | veth01 | - | veth10 || veth12 || +# ---peer ------| +# | || | +# --- || | +# | br0| |- peer |--| +# --- || | +# | || | +# -- peer -- --- | +# | veth02 | - | veth20 | | veth21 | | +# -- | -- --- | +# | ns2 | +# | | +#--+ +# +# We assume that all network driver are loaded +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" +readonly datalen=32 + +sysipvsnet="/proc/sys/net/ipv4/vs/" +if [ ! -d $sysipvsnet ]; then + modprobe -q ip_vs + if [ $? -ne 0 ]; then + echo "skip: could not run test without ipvs module" + exit $ksft_skip + fi +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ipvsadm -v > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ipvsadm" + exit $ksft_skip +fi + +setup() { + ip netns add ns0 + ip netns add ns1 + ip netns add ns2 + + ip link add veth01 netns ns0 type veth peer name veth10 netns ns1 + ip link add veth02 netns ns0 type veth peer name veth20 netns ns2 + ip link add veth12 netns ns1 type veth peer name veth21 netns ns2 + + ip netns exec ns0 ip link set veth01 up + ip netns exec ns0 ip link set veth02 up + ip netns exec ns0 ip link add br0 type bridge + ip netns exec ns0 ip link set veth01 master br0 + ip netns exec ns0 ip link set veth02 master br0 + ip netns exec ns0 ip link set br0 up + ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0 + + ip netns exec ns1 ip link set lo up + ip netns exec ns1 ip link set veth10 up + ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10 + ip netns exec ns1 ip link set veth12 up + ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12 + + ip netns exec ns2 ip link set lo up + ip netns exec ns2 ip link set veth21 up + ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21 + ip netns exec ns2 ip link set veth20 up + ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20 + + sleep 1 + + dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none +} + +cleanup() { +
[PATCH v6 2/3] selftests: netfilter: add ipvs nat test case
Test virtual server via NAT. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # ipvs.sh: PASS Signed-off-by: Haishuang Yan --- tools/testing/selftests/netfilter/ipvs.sh | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index 3d11d87..8b2e618 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -154,20 +154,40 @@ test_dr() { test_service } +test_nat() { + ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr + ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port} + ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + + ip netns exec ns2 ip link del veth20 + ip netns exec ns2 ip route add default via ${dip_v4} dev veth21 + + test_service +} + run_tests() { local errors= echo "Testing DR mode..." + cleanup setup test_dr errors=$(( $errors + $? )) + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + return $errors } trap cleanup EXIT -cleanup run_tests if [ $? -ne 0 ]; then -- 1.8.3.1
[PATCH v6 3/3] selftests: netfilter: add ipvs tunnel test case
Test virtual server via ipip tunnel. Tested: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan --- v2: optimize test script --- tools/testing/selftests/netfilter/ipvs.sh | 30 ++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh index 8b2e618..c3b8f90 100755 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -168,6 +168,30 @@ test_nat() { test_service } +test_tun() { + ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + + ip netns exec ns1 modprobe ipip + ip netns exec ns1 ip link set tunl0 up + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0 + ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0 + ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0 + ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr + ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port} + ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + + ip netns exec ns2 modprobe ipip + ip netns exec ns2 ip link set tunl0 up + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 + ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 + ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 + + test_service +} + run_tests() { local errors= @@ -183,6 +207,12 @@ run_tests() { test_nat errors=$(( $errors + $? )) + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + return $errors } -- 1.8.3.1
[PATCH v6 0/3] selftests: netfilter: introduce test cases for ipvs
This series patch include test cases for ipvs. The test topology is who as below: +--+ | | | | ns0 | ns1 | | --- | ------| | | veth01 | - | veth10 || veth12 || | ---peer ------| | | || | | --- || | | | br0| |- peer |--| | --- || | | | || | | -- peer -- --- | | | veth02 | - | veth20 | | veth12 | | | -- | -- --- | | | ns2 | | | | +--+ Test results: # selftests: netfilter: ipvs.sh # Testing DR mode... # Testing NAT mode... # Testing Tunnel mode... # ipvs.sh: PASS ok 6 selftests: netfilter: ipvs.sh Signed-off-by: Haishuang Yan Haishuang Yan (3): selftests: netfilter: add ipvs test script selftests: netfilter: add ipvs nat test case selftests: netfilter: add ipvs tunnel test case tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/ipvs.sh | 228 + 2 files changed, 229 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipvs.sh -- 1.8.3.1
[net-next 1/2] ipvs: batch __ip_vs_cleanup
It's better to batch __ip_vs_cleanup to speedup ipvs connections dismantle. Signed-off-by: Haishuang Yan --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_core.c | 29 + net/netfilter/ipvs/ip_vs_ctl.c | 13 ++--- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3759167..93e7a25 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1324,7 +1324,7 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp) void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs); void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs); void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs); -void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs); +void ip_vs_service_nets_cleanup(struct list_head *net_list); /* IPVS application functions * (from ip_vs_app.c) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 46f06f9..b4d79b7 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2402,18 +2402,23 @@ static int __net_init __ip_vs_init(struct net *net) return -ENOMEM; } -static void __net_exit __ip_vs_cleanup(struct net *net) +static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list) { - struct netns_ipvs *ipvs = net_ipvs(net); - - ip_vs_service_net_cleanup(ipvs);/* ip_vs_flush() with locks */ - ip_vs_conn_net_cleanup(ipvs); - ip_vs_app_net_cleanup(ipvs); - ip_vs_protocol_net_cleanup(ipvs); - ip_vs_control_net_cleanup(ipvs); - ip_vs_estimator_net_cleanup(ipvs); - IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); - net->ipvs = NULL; + struct netns_ipvs *ipvs; + struct net *net; + LIST_HEAD(list); + + ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */ + list_for_each_entry(net, net_list, exit_list) { + ipvs = net_ipvs(net); + ip_vs_conn_net_cleanup(ipvs); + ip_vs_app_net_cleanup(ipvs); + ip_vs_protocol_net_cleanup(ipvs); + ip_vs_control_net_cleanup(ipvs); + ip_vs_estimator_net_cleanup(ipvs); + IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); + net->ipvs = NULL; + } } static int __net_init __ip_vs_dev_init(struct net *net) @@ -2442,7 +2447,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) static struct pernet_operations ipvs_core_ops = { .init = __ip_vs_init, - .exit = __ip_vs_cleanup, + .exit_batch = __ip_vs_cleanup_batch, .id = &ip_vs_net_id, .size = sizeof(struct netns_ipvs), }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 07e0967..c8e652b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1607,14 +1607,21 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) /* * Delete service by {netns} in the service table. - * Called by __ip_vs_cleanup() + * Called by __ip_vs_batch_cleanup() */ -void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs) +void ip_vs_service_nets_cleanup(struct list_head *net_list) { + struct netns_ipvs *ipvs; + struct net *net; + LIST_HEAD(list); + EnterFunction(2); /* Check for "full" addressed entries */ mutex_lock(&__ip_vs_mutex); - ip_vs_flush(ipvs, true); + list_for_each_entry(net, net_list, exit_list) { + ipvs = net_ipvs(net); + ip_vs_flush(ipvs, true); + } mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } -- 1.8.3.1
[net-next 2/2] ipvs: batch __ip_vs_dev_cleanup
It's better to batch __ip_vs_cleanup to speedup ipvs devices dismantle. Signed-off-by: Haishuang Yan --- net/netfilter/ipvs/ip_vs_core.c | 20 +--- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b4d79b7..58af24a 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2434,14 +2434,20 @@ static int __net_init __ip_vs_dev_init(struct net *net) return ret; } -static void __net_exit __ip_vs_dev_cleanup(struct net *net) +static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list) { - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs; + struct net *net; + LIST_HEAD(list); + EnterFunction(2); - nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); - ipvs->enable = 0; /* Disable packet reception */ - smp_wmb(); - ip_vs_sync_net_cleanup(ipvs); + list_for_each_entry(net, net_list, exit_list) { + ipvs = net_ipvs(net); + nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + ipvs->enable = 0; /* Disable packet reception */ + smp_wmb(); + ip_vs_sync_net_cleanup(ipvs); + } LeaveFunction(2); } @@ -2454,7 +2460,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) static struct pernet_operations ipvs_core_dev_ops = { .init = __ip_vs_dev_init, - .exit = __ip_vs_dev_cleanup, + .exit_batch = __ip_vs_dev_cleanup_batch, }; /* -- 1.8.3.1
[net-next 0/2] ipvs: speedup ipvs netns dismantle
Implement exit_batch() method to dismantle more ipvs netns per round. Haishuang Yan (2): ipvs: batch __ip_vs_cleanup ipvs: batch __ip_vs_dev_cleanup include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_core.c | 49 + net/netfilter/ipvs/ip_vs_ctl.c | 13 --- 3 files changed, 41 insertions(+), 23 deletions(-) -- 1.8.3.1
[PATCH] sit: use dst_cache in ipip6_tunnel_xmit
Same as other ip tunnel, use dst_cache in xmit action to avoid unnecessary fib lookups. Signed-off-by: Haishuang Yan --- net/ipv6/sit.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8061089..b2ccbc4 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -900,12 +900,17 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0, sock_net_uid(tunnel->net, NULL)); - rt = ip_route_output_flow(tunnel->net, &fl4, NULL); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; + rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr); + if (!rt) { + rt = ip_route_output_flow(tunnel->net, &fl4, NULL); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr); } + if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); dev->stats.tx_carrier_errors++; -- 1.8.3.1
[PATCH] ipip: validate header length in ipip_tunnel_xmit
We need the same checks introduced by commit cb9f1b783850 ("ip: validate header length on virtual device xmit") for ipip tunnel. Signed-off-by: Haishuang Yan --- net/ipv4/ipip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 43adfc1..2f01cf6 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -275,6 +275,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, const struct iphdr *tiph = &tunnel->parms.iph; u8 ipproto; + if (!pskb_inet_may_pull(skb)) + goto tx_error; + switch (skb->protocol) { case htons(ETH_P_IP): ipproto = IPPROTO_IPIP; -- 1.8.3.1
[PATCH] ipip: validate header length in ipip_tunnel_xmit
We need the same checks introduced by commit cb9f1b783850 ("ip: validate header length on virtual device xmit") for ipip tunnel. Signed-off-by: Haishuang Yan --- net/ipv4/ipip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 43adfc1..2f01cf6 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -275,6 +275,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, const struct iphdr *tiph = &tunnel->parms.iph; u8 ipproto; + if (!pskb_inet_may_pull(skb)) + goto tx_error; + switch (skb->protocol) { case htons(ETH_P_IP): ipproto = IPPROTO_IPIP; -- 1.8.3.1
[PATCH] ip6_tunnel: fix possible use-after-free on xmit
ip4ip6/ip6ip6 tunnels run iptunnel_handle_offloads on xmit which can cause a possible use-after-free accessing iph/ipv6h pointer since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb. Fixes: 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated packets") Signed-off-by: Haishuang Yan --- net/ipv6/ip6_tunnel.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3134fbb..754a484 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1278,12 +1278,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); - skb_set_inner_ipproto(skb, IPPROTO_IPIP); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, @@ -1367,12 +1366,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); - skb_set_inner_ipproto(skb, IPPROTO_IPV6); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, -- 1.8.3.1
[PATCH] ip6_gre: reload ipv6h in prepare_ip6gre_xmit_ipv6
Since ip6_tnl_parse_tlv_enc_lim() can call pskb_may_pull() which may change skb->data, so we need to re-load ipv6h at the right place. Fixes: 898b29798e36 ("ip6_gre: Refactor ip6gre xmit codes") Cc: William Tu Signed-off-by: Haishuang Yan --- net/ipv6/ip6_gre.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c2049c7..dd2d0b96 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -660,12 +660,13 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb, struct flowi6 *fl6, __u8 *dsfield, int *encap_limit) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ipv6hdr *ipv6h; struct ip6_tnl *t = netdev_priv(dev); __u16 offset; offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + ipv6h = ipv6_hdr(skb); if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; -- 1.8.3.1
Re: [PATCH] openvswitch: Fix a possible memory leak on dst_cache
> On 2019年7月19日, at 上午6:12, Gregory Rose wrote: > > On 7/18/2019 9:07 AM, Haishuang Yan wrote: >> dst_cache should be destroyed when fail to add flow actions. >> >> Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel") >> Signed-off-by: Haishuang Yan >> --- >> net/openvswitch/flow_netlink.c | 1 + >> 1 file changed, 1 insertion(+) >> >> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c >> index d7559c6..1fd1cdd 100644 >> --- a/net/openvswitch/flow_netlink.c >> +++ b/net/openvswitch/flow_netlink.c >> @@ -2608,6 +2608,7 @@ static int validate_and_copy_set_tun(const struct >> nlattr *attr, >> sizeof(*ovs_tun), log); >> if (IS_ERR(a)) { >> dst_release((struct dst_entry *)tun_dst); >> +dst_cache_destroy(&tun_dst->u.tun_info.dst_cache); >> return PTR_ERR(a); >> } >> > > Nack. > > dst_release will decrement the ref count and will call_rcu(&dst->rcu_head, > dst_destroy_rcu) if the ref count is zero. No other net drivers call > dst_destroy SFAICT. > > Haishuang, > > are you trying to fix some specific problem here? > > Thanks, > > - Greg > > Greg, You’re right, dst_cache would be freed in metadata_dst_free: 125 126 if (dst->flags & DST_METADATA) 127 metadata_dst_free((struct metadata_dst *)dst); 128 else 129 kmem_cache_free(dst->ops->kmem_cachep, dst); 130 I thought I encountered a memory leak, but it seems not an issue, thanks for you explanation.
Re: [PATCH] ip6_gre: simplify gre header parsing in ip6gre_err
> On 2018年9月10日, at 下午11:36, Jiri Benc wrote: > > On Mon, 10 Sep 2018 16:25:09 +0800, Haishuang Yan wrote: >> +if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), >> + offset) < 0) { >> +if (!csum_err) /* ignore csum errors. */ >> +return; >> } > > gre_parse_header stops parsing when csum_err is encountered. Which > means tpi.key is undefined... > >> >> -if (!pskb_may_pull(skb, offset + grehlen)) >> -return; >> ipv6h = (const struct ipv6hdr *)skb->data; >> -greh = (const struct gre_base_hdr *)(skb->data + offset); >> -key = key_off ? *(__be32 *)(skb->data + key_off) : 0; >> - >> t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, >> - key, greh->protocol); >> + tpi.key, tpi.proto); > > ...and can't be used here. > > Jiri > You are right. Thanks for reviewing. So the same problem also arise in ipgre_err code: 187 iph = (const struct iphdr *)(icmp_hdr(skb) + 1); 188 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 189 iph->daddr, iph->saddr, tpi->key); Since csum_err may not be used outside, how about refactoring gre_parse_header function like this: --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -86,7 +86,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, options = (__be32 *)(greh + 1); if (greh->flags & GRE_CSUM) { - if (skb_checksum_simple_validate(skb)) { + if (csum_err && skb_checksum_simple_validate(skb)) { *csum_err = true; return -EINVAL; } And in gre_err function, we can call gre_parse_header(skb, &tpi, NULL, **) like this: --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -234,11 +234,9 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), -iph->ihl * 4) < 0) { - if (!csum_err) /* ignore csum errors. */ + if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP), +iph->ihl * 4) < 0) return; - }
[PATCH v2] geneve: fix max_mtu setting
For ipv6+udp+geneve encapsulation data, the max_mtu should subtract sizeof(ipv6hdr), instead of sizeof(iphdr). Signed-off-by: Haishuang Yan --- Changes in v2: - As suggested by Jesse Gross, treat AF_UNSPEC same as AF_INET4 to avoid disallowing potentially valid configrations. --- drivers/net/geneve.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 310e0b9c..5de892f 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1036,12 +1036,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict) { + struct geneve_dev *geneve = netdev_priv(dev); /* The max_mtu calculation does not take account of GENEVE * options, to avoid excluding potentially valid * configurations. */ - int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr) - - dev->hard_header_len; + int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; + + if (geneve->remote.sa.sa_family == AF_INET6) + max_mtu -= sizeof(struct ipv6hdr); + else + max_mtu -= sizeof(struct iphdr); if (new_mtu < 68) return -EINVAL; -- 1.8.3.1
[PATCH] sched, cgroup: enclose root_task_group with macro CONFIG_CGROUP_SCHED.
root_task_group defined in sched/core.c is enclosed by CONFIG_CGROUP_SCHED, so the export declaration should also be enclosed. Signed-off-by: Haishuang Yan --- include/linux/init_task.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 325f649..f3f73fa 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -132,9 +132,8 @@ extern struct cred init_cred; -extern struct task_group root_task_group; - #ifdef CONFIG_CGROUP_SCHED +extern struct task_group root_task_group; # define INIT_CGROUP_SCHED(tsk) \ .sched_task_group = &root_task_group, #else -- 1.8.3.1
[PATCH] geneve: fix ip_hdr_len reserved for geneve6 tunnel.
It shold reserved sizeof(ipv6hdr) for geneve in ipv6 tunnel. Fixes: c3ef5aa5e5 ('geneve: Merge ipv4 and ipv6 geneve_build_skb()') Signed-off-by: Haishuang Yan --- drivers/net/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 7b80e28..45301cb 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -852,7 +852,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ip_hdr(skb), skb); ttl = key->ttl ? : ip6_dst_hoplimit(dst); } - err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct iphdr)); + err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr)); if (unlikely(err)) return err; -- 1.8.3.1
[PATCH] vxlan: fix a potential issue when create a new vxlan fdb entry.
vxlan_fdb_append may return error, so add the proper check, otherwise it will cause memory leak. Signed-off-by: Haishuang Yan --- drivers/net/vxlan.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 21e92be..3b7b237 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -611,6 +611,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, struct vxlan_rdst *rd = NULL; struct vxlan_fdb *f; int notify = 0; + int rc = 0; f = __vxlan_find_mac(vxlan, mac); if (f) { @@ -641,8 +642,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, if ((flags & NLM_F_APPEND) && (is_multicast_ether_addr(f->eth_addr) || is_zero_ether_addr(f->eth_addr))) { - int rc = vxlan_fdb_append(f, ip, port, vni, ifindex, - &rd); + rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); if (rc < 0) return rc; @@ -673,7 +673,11 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, INIT_LIST_HEAD(&f->remotes); memcpy(f->eth_addr, mac, ETH_ALEN); - vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); + rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); + if (rc < 0) { + kfree(f); + return rc; + } ++vxlan->addrcnt; hlist_add_head_rcu(&f->hlist, -- 1.8.3.1
[PATCH] ipv4: Namespaceify tcp_tw_reuse knob
Signed-off-by: Haishuang Yan --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_ipv4.c| 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0cf5a1..0378e88 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -110,6 +110,7 @@ struct netns_ipv4 { int sysctl_tcp_orphan_retries; int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_tw_reuse; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/tcp.h b/include/net/tcp.h index 207147b..6061963 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -252,7 +252,6 @@ extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; -extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b..22cbd61 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -433,13 +433,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, .extra2 = &tcp_adv_win_scale_max, }, { - .procname = "tcp_tw_reuse", - .data = &sysctl_tcp_tw_reuse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_frto", .data = &sysctl_tcp_frto, .maxlen = sizeof(int), @@ -960,6 +953,13 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_tw_reuse", + .data = &init_net.ipv4.sysctl_tcp_tw_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30d81f5..fe9da4f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,7 +84,6 @@ #include #include -int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; #ifdef CONFIG_TCP_MD5SIG @@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && + (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; + net->ipv4.sysctl_tcp_tw_reuse = 0; return 0; fail: -- 1.8.3.1
[PATCH v2] ipv4: Namespaceify tcp_tw_reuse knob
Different namespaces might have different requirements to reuse TIME-WAIT sockets for new connections. This might be required in cases where different namespace applications are in place which require TIME_WAIT socket connections to be reduced independently of the host. Signed-off-by: Haishuang Yan --- Changes in v2: - Make the commit message more clearer. --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_ipv4.c| 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0cf5a1..0378e88 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -110,6 +110,7 @@ struct netns_ipv4 { int sysctl_tcp_orphan_retries; int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_tw_reuse; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/tcp.h b/include/net/tcp.h index 207147b..6061963 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -252,7 +252,6 @@ extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; -extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b..22cbd61 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -433,13 +433,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, .extra2 = &tcp_adv_win_scale_max, }, { - .procname = "tcp_tw_reuse", - .data = &sysctl_tcp_tw_reuse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_frto", .data = &sysctl_tcp_frto, .maxlen = sizeof(int), @@ -960,6 +953,13 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_tw_reuse", + .data = &init_net.ipv4.sysctl_tcp_tw_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30d81f5..fe9da4f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,7 +84,6 @@ #include #include -int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; #ifdef CONFIG_TCP_MD5SIG @@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && + (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; + net->ipv4.sysctl_tcp_tw_reuse = 0; return 0; fail: -- 1.8.3.1
[PATCH 2/2] ipv4: Namespaceify tcp_max_syn_backlog knob
Different namespace application might require different maximal number of remembered connection requests. Signed-off-by: Haishuang Yan --- include/net/netns/ipv4.h | 1 + include/net/request_sock.h | 4 +--- net/core/request_sock.c| 2 -- net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp.c | 2 -- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_ipv4.c| 7 +-- 7 files changed, 16 insertions(+), 18 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 99becaf..96b15a2 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -122,6 +122,7 @@ struct netns_ipv4 { unsigned int sysctl_tcp_notsent_lowat; int sysctl_tcp_tw_reuse; struct inet_timewait_death_row tcp_death_row; + int sysctl_max_syn_backlog; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 6ebe13e..a12a5d2 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -1,7 +1,7 @@ /* * NET Generic infrastructure for Network protocols. * - * Definitions for request_sock + * Definitions for request_sock * * Authors:Arnaldo Carvalho de Melo * @@ -123,8 +123,6 @@ static inline void reqsk_put(struct request_sock *req) reqsk_free(req); } -extern int sysctl_max_syn_backlog; - /* * For a TCP Fast Open listener - * lock - protects the access to all the reqsk, which is co-owned by diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 5d26056..9b8727c 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -34,8 +34,6 @@ * and it will increase in proportion to the memory of machine. * Note : Dont forget somaxconn that may limit backlog too. */ -int sysctl_max_syn_backlog = 256; -EXPORT_SYMBOL(sysctl_max_syn_backlog); void reqsk_queue_alloc(struct request_sock_queue *queue) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 66f8f1b..134d8e1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -324,13 +324,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_max_syn_backlog", - .data = &sysctl_max_syn_backlog, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, .maxlen = sizeof(int), @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_max_syn_backlog", + .data = &init_net.ipv4.sysctl_max_syn_backlog, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 91938c9..f0637a9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3378,9 +3378,7 @@ void __init tcp_init(void) cnt = tcp_hashinfo.ehash_mask + 1; - sysctl_tcp_max_orphans = cnt / 2; - sysctl_max_syn_backlog = max(128, cnt / 256); tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c614802..ec6d843 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6377,8 +6377,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } /* Kill the following clause, if you dislike this way. */ else if (!net->ipv4.sysctl_tcp_syncookies && -(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && +(net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (net->ipv4.sysctl_max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst, false, tmp_opt.saw_tstamp)) { /* Without syncookies last quarter of diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 56b5f49..7e4be4f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2419,7 +2419,7 @@ static void __net_exit tcp_sk_exit(struct net *net) static int __net_init tcp_sk_init(struct net *net) { - int res, cpu; + int res, cpu, cn
[PATCH 1/2] ipv4: Namespaceify tcp_tw_recycle and tcp_max_tw_buckets knob
Different namespace application might require fast recycling TIME-WAIT sockets independently of the host. Signed-off-by: Haishuang Yan --- include/net/inet_timewait_sock.h | 13 + include/net/netns/ipv4.h | 11 +++ include/net/tcp.h| 1 - net/ipv4/af_inet.c | 2 -- net/ipv4/inet_timewait_sock.c| 3 +-- net/ipv4/proc.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 28 ++-- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 12 net/ipv4/tcp_minisocks.c | 14 +- net/ipv6/tcp_ipv6.c | 7 --- 12 files changed, 48 insertions(+), 50 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index c9b3eb7..6a75d67 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -29,16 +29,6 @@ #include -struct inet_hashinfo; - -struct inet_timewait_death_row { - atomic_ttw_count; - - struct inet_hashinfo*hashinfo cacheline_aligned_in_smp; - int sysctl_tw_recycle; - int sysctl_max_tw_buckets; -}; - struct inet_bind_bucket; /* @@ -125,8 +115,7 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); -void inet_twsk_purge(struct inet_hashinfo *hashinfo, -struct inet_timewait_death_row *twdr, int family); +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family); static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 0378e88..99becaf 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -27,6 +27,16 @@ struct ping_group_range { kgid_t range[2]; }; +struct inet_hashinfo; + +struct inet_timewait_death_row { + atomic_ttw_count; + + struct inet_hashinfo*hashinfo cacheline_aligned_in_smp; + int sysctl_tw_recycle; + int sysctl_max_tw_buckets; +}; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -111,6 +121,7 @@ struct netns_ipv4 { int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; int sysctl_tcp_tw_reuse; + struct inet_timewait_death_row tcp_death_row; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/tcp.h b/include/net/tcp.h index 6061963..1da0aa7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -231,7 +231,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #defineTFO_SERVER_WO_SOCKOPT1 0x400 -extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ extern int sysctl_tcp_timestamps; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1830e6f..29b1dd9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1831,8 +1831,6 @@ static int __init inet_init(void) ip_init(); - tcp_v4_init(); - /* Setup TCP slab cache for open requests. */ tcp_init(); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ddcd56c..f8aff2c 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -257,8 +257,7 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); -void inet_twsk_purge(struct inet_hashinfo *hashinfo, -struct inet_timewait_death_row *twdr, int family) +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) { struct inet_timewait_sock *tw; struct sock *sk; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 7143ca1..0247ca0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, - atomic_read(&tcp_death_row.tw_count), sockets, + atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets, proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 22cbd61..66f8f1b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -290,13 +290,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname =
[PATCH v2] vxlan: fix a potential issue when create a new vxlan fdb entry.
vxlan_fdb_append may return error, so add the proper check, otherwise it will cause memory leak. Signed-off-by: Haishuang Yan Changes in v2: - Unnecessary to initialize rc to zero. --- drivers/net/vxlan.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 21e92be..bb70dd5 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -611,6 +611,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, struct vxlan_rdst *rd = NULL; struct vxlan_fdb *f; int notify = 0; + int rc; f = __vxlan_find_mac(vxlan, mac); if (f) { @@ -641,8 +642,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, if ((flags & NLM_F_APPEND) && (is_multicast_ether_addr(f->eth_addr) || is_zero_ether_addr(f->eth_addr))) { - int rc = vxlan_fdb_append(f, ip, port, vni, ifindex, - &rd); + rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); if (rc < 0) return rc; @@ -673,7 +673,11 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, INIT_LIST_HEAD(&f->remotes); memcpy(f->eth_addr, mac, ETH_ALEN); - vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); + rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); + if (rc < 0) { + kfree(f); + return rc; + } ++vxlan->addrcnt; hlist_add_head_rcu(&f->hlist, -- 1.8.3.1
[PATCH] openvswitch: add sanity check in queue_userspace_packet.
kernel will crash in oops if genlmsg_put return NULL, so add the sanity check. Signed-off-by: Haishuang Yan --- net/openvswitch/datapath.c | 4 1 file changed, 4 insertions(+) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2d4c4d3..ceb1b1e 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -474,6 +474,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd); + if (!upcall) { + err = -EMSGSIZE; + goto out; + } upcall->dp_ifindex = dp_ifindex; err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); -- 1.8.3.1
[PATCH] geneve: fix max_mtu setting
For ipv6+udp+geneve encapsulation data, the max_mtu should subtract sizeof(ipv6hdr), instead of sizeof(iphdr). Signed-off-by: Haishuang Yan --- drivers/net/geneve.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index aa61708..c676d23 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1036,12 +1036,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict) { + struct geneve_dev *geneve = netdev_priv(dev); /* The max_mtu calculation does not take account of GENEVE * options, to avoid excluding potentially valid * configurations. */ - int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr) - - dev->hard_header_len; + int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; + + if (geneve->remote.sa.sa_family == AF_INET) + max_mtu -= sizeof(struct iphdr); + else + max_mtu -= sizeof(struct ipv6hdr); if (new_mtu < 68) return -EINVAL; -- 1.8.3.1
[PATCH] openvswitch: Use proper buffer size in nla_memcpy
For the input parameter count, it's better to use the size of destination buffer size, as nla_memcpy would take into account the length of the source netlink attribute when a data is copied from an attribute. Signed-off-by: Haishuang Yan --- net/openvswitch/conntrack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index dc5eb29..f8a8d43 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -968,7 +968,8 @@ static int parse_nat(const struct nlattr *attr, break; case OVS_NAT_ATTR_IP_MIN: - nla_memcpy(&info->range.min_addr, a, nla_len(a)); + nla_memcpy(&info->range.min_addr, a, + sizeof(info->range.min_addr)); info->range.flags |= NF_NAT_RANGE_MAP_IPS; break; -- 1.8.3.1
[PATCH] bridge: Allow set bridge ageing time when switchdev disabled
When NET_SWITCHDEV=n, switchdev_port_attr_set will return -EOPNOTSUPP, we should ignore this error code and continue to set the ageing time. Signed-off-by: Haishuang Yan --- net/bridge/br_stp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index e234490..9cb7044 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -582,7 +582,7 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) int err; err = switchdev_port_attr_set(br->dev, &attr); - if (err) + if (err && err != -EOPNOTSUPP) return err; br->ageing_time = t; -- 1.8.3.1
[PATCH] gre: fix return value of gre_rcv
Dropped skb's should be documented by an appropriate return value. Use the correct NET_RX_DROP and NET_RX_SUCCESS values for that reason. Signed-off-by: Haishuang Yan --- net/ipv4/ip_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 31936d3..1dc0cdb 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -432,12 +432,12 @@ static int gre_rcv(struct sk_buff *skb) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) - return 0; + return NET_RX_SUCCESS; icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: kfree_skb(skb); - return 0; + return NET_RX_DROP; } static __sum16 gre_checksum(struct sk_buff *skb) -- 1.8.3.1
[PATCH] vlan: propagate gso_min_segs
vlan drivers lack proper propagation of gso_min_segs from lower device. Signed-off-by: Haishuang Yan --- drivers/net/ipvlan/ipvlan_main.c | 2 ++ drivers/net/macvlan.c| 1 + net/8021q/vlan.c | 1 + net/8021q/vlan_dev.c | 1 + 4 files changed, 5 insertions(+) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 57941d3..72a2517 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -120,6 +120,7 @@ static int ipvlan_init(struct net_device *dev) dev->features |= NETIF_F_LLTX; dev->gso_max_size = phy_dev->gso_max_size; dev->gso_max_segs = phy_dev->gso_max_segs; + dev->gso_min_segs = phy_dev->gso_min_segs; dev->hard_header_len = phy_dev->hard_header_len; ipvlan_set_lockdep_class(dev); @@ -594,6 +595,7 @@ static int ipvlan_device_event(struct notifier_block *unused, ipvlan->dev->features = dev->features & IPVLAN_FEATURES; ipvlan->dev->gso_max_size = dev->gso_max_size; ipvlan->dev->gso_max_segs = dev->gso_max_segs; + ipvlan->dev->gso_min_segs = dev->gso_min_segs; netdev_features_change(ipvlan->dev); } break; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 2bcf1f3..72991e9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1534,6 +1534,7 @@ static int macvlan_device_event(struct notifier_block *unused, list_for_each_entry(vlan, &port->vlans, list) { vlan->dev->gso_max_size = dev->gso_max_size; vlan->dev->gso_max_segs = dev->gso_max_segs; + vlan->dev->gso_min_segs = dev->gso_min_segs; netdev_update_features(vlan->dev); } break; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a1e273a..01a4de1 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -312,6 +312,7 @@ static void vlan_transfer_features(struct net_device *dev, vlandev->gso_max_size = dev->gso_max_size; vlandev->gso_max_segs = dev->gso_max_segs; + vlandev->gso_min_segs = dev->gso_min_segs; if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e7e6257..752263d 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -552,6 +552,7 @@ static int vlan_dev_init(struct net_device *dev) NETIF_F_GSO_SOFTWARE; dev->gso_max_size = real_dev->gso_max_size; dev->gso_max_segs = real_dev->gso_max_segs; + dev->gso_min_segs = real_dev->gso_min_segs; if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); -- 1.8.3.1
[PATCH] net: ping: make ping_v6_sendmsg static
As ping_v6_sendmsg is used only in this file, making it static The body of "pingv6_prot" and "pingv6_protosw" were moved at the middle of the file, to avoid having to declare some static prototypes. Signed-off-by: Haishuang Yan --- include/net/ping.h | 1 - net/ipv6/ping.c| 59 +++--- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/include/net/ping.h b/include/net/ping.h index 5fd7cc2..4cd90d6 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -79,7 +79,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); -int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); bool ping_rcv(struct sk_buff *skb); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 263a516..c382db7 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -26,35 +26,6 @@ #include #include -struct proto pingv6_prot = { - .name = "PINGv6", - .owner =THIS_MODULE, - .init = ping_init_sock, - .close =ping_close, - .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, - .setsockopt = ipv6_setsockopt, - .getsockopt = ipv6_getsockopt, - .sendmsg = ping_v6_sendmsg, - .recvmsg = ping_recvmsg, - .bind = ping_bind, - .backlog_rcv = ping_queue_rcv_skb, - .hash = ping_hash, - .unhash = ping_unhash, - .get_port = ping_get_port, - .obj_size = sizeof(struct raw6_sock), -}; -EXPORT_SYMBOL_GPL(pingv6_prot); - -static struct inet_protosw pingv6_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_ICMPV6, - .prot = &pingv6_prot, - .ops = &inet6_dgram_ops, - .flags = INET_PROTOSW_REUSE, -}; - - /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -77,7 +48,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) +static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -192,6 +163,34 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return len; } +struct proto pingv6_prot = { + .name = "PINGv6", + .owner =THIS_MODULE, + .init = ping_init_sock, + .close =ping_close, + .connect = ip6_datagram_connect_v6_only, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .flags = INET_PROTOSW_REUSE, +}; + #ifdef CONFIG_PROC_FS static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) { -- 1.8.3.1
Re: [PATCH v2,net-next] ip6_gre: fix a pontential issue in ip6erspan_rcv
> On 2017年12月19日, at 下午11:34, David Miller wrote: > > From: Haishuang Yan > Date: Sat, 16 Dec 2017 10:25:25 +0800 > >> pskb_may_pull() can change skb->data, so we need to load ipv6h/ershdr at >> the right place. >> >> Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") >> Acked-by: William Tu >> Cc: William Tu >> Signed-off-by: Haishuang Yan > > This patch does not apply: > >> +ipv6h = ipv6_hdr(skb); >> +ershdr = (struct erspan_base_hdr *)skb->data; >> ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET; >> tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); >> pkt_md = (struct erspan_metadata *)(ershdr + 1); > > There is not "pkt_md = ..." assignment in net-next on this line. > Okay, I will fix it and resubmit another commit, thanks.
Re: [PATCH v2,net-next 1/2] ip_gre: fix potential memory leak in erspan_rcv
> On 2017年12月19日, at 下午11:36, David Miller wrote: > > From: Haishuang Yan > Date: Sat, 16 Dec 2017 10:48:38 +0800 > >> If md is NULL, tun_dst must be freed, otherwise it will cause memory >> leak. >> >> Fixes: 1a66a836da6 ("gre: add collect_md mode to ERSPAN tunnel") >> Cc: William Tu >> Signed-off-by: Haishuang Yan >> >> Change since v2: >> * Rebase on latest master branch. >> * Correct wrong fix information. > > Please do not put a changelog after the fixes and signoff tags, those tags > must > appear last in the commit message. > > Thank you. > Okay, I will resubmit another commit, thanks.
[PATCH v3,net-next] ip6_gre: fix a pontential issue in ip6erspan_rcv
pskb_may_pull() can change skb->data, so we need to load ipv6h/ershdr at the right place. Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Cc: William Tu Acked-by: William Tu Signed-off-by: Haishuang Yan --- Change since v3: * Rebase on latest master branch. * Fix wrong commit information. --- net/ipv6/ip6_gre.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 87b9892..9bd1103 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -507,12 +507,11 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, struct ip6_tnl *tunnel; u8 ver; - ipv6h = ipv6_hdr(skb); - ershdr = (struct erspan_base_hdr *)skb->data; - if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr return PACKET_REJECT; + ipv6h = ipv6_hdr(skb); + ershdr = (struct erspan_base_hdr *)skb->data; ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET; tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); -- 1.8.3.1
[PATCH v3,net-next 1/2] ip_gre: fix potential memory leak in erspan_rcv
If md is NULL, tun_dst must be freed, otherwise it will cause memory leak. Fixes: 1a66a836da6 ("gre: add collect_md mode to ERSPAN tunnel") Cc: William Tu Signed-off-by: Haishuang Yan --- Changes since v3: * Rebase on latest master branch. * Fix wrong commit information. --- net/ipv4/ip_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index fd4d6e9..3029e3e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -313,8 +313,10 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, return PACKET_REJECT; md = ip_tunnel_info_opts(&tun_dst->u.tun_info); - if (!md) + if (!md) { + dst_release((struct dst_entry *)tun_dst); return PACKET_REJECT; + } memcpy(md, pkt_md, sizeof(*md)); md->version = ver; -- 1.8.3.1
[PATCH v3,net-next 0/2] net: erspan: fix potential memory leak
This patch series fix potential memory leak issue. Haishuang Yan (2): ip_gre: fix potential memory leak in erspan_rcv ip6_gre: fix potential memory leak in ip6erspan_rcv net/ipv4/ip_gre.c | 4 +++- net/ipv6/ip6_gre.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) -- 1.8.3.1
[PATCH v3,net-next 2/2] ip6_gre: fix potential memory leak in ip6erspan_rcv
If md is NULL, tun_dst must be freed, otherwise it will cause memory leak. Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode") Cc: William Tu Signed-off-by: Haishuang Yan --- Changes since v3: * Rebase on latest master branch. * Fix wrong commit information. --- net/ipv6/ip6_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9bd1103..45038a9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -550,8 +550,10 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, info = &tun_dst->u.tun_info; md = ip_tunnel_info_opts(info); - if (!md) + if (!md) { + dst_release((struct dst_entry *)tun_dst); return PACKET_REJECT; + } memcpy(md, pkt_md, sizeof(*md)); md->version = ver; -- 1.8.3.1
[PATCH v3,net-next 0/2] net: erspan: fix erspan_rcv/ip6erspan_rcv error path
This patch series fix potential issue in error path. Haishuang Yan (2): ip_gre: fix error path when erspan_rcv failed ip6_gre: fix error path when ip6erspan_rcv failed net/ipv4/ip_gre.c | 2 ++ net/ipv6/ip6_gre.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) -- 1.8.3.1
[PATCH v3,net-next 1/2] ip_gre: fix error path when erspan_rcv failed
When erspan_rcv call return PACKET_REJECT, we shoudn't call ipgre_rcv to process packets again, instead send icmp unreachable message in error path. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Acked-by: William Tu Cc: William Tu Signed-off-by: Haishuang Yan --- Change since v3: * Rebase on latest master branch. * Fix wrong commit information. --- net/ipv4/ip_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 3029e3e..90c9123 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -436,11 +436,13 @@ static int gre_rcv(struct sk_buff *skb) tpi.proto == htons(ETH_P_ERSPAN2))) { if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; + goto out; } if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; +out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: kfree_skb(skb); -- 1.8.3.1
[PATCH v3,net-next 2/2] ip6_gre: fix error path when ip6erspan_rcv failed
Same as ipv4 code, when ip6erspan_rcv call return PACKET_REJECT, we should call icmpv6_send to send icmp unreachable message in error path. Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Acked-by: William Tu Cc: William Tu Signed-off-by: Haishuang Yan --- Change since v2: * Rebase on latest master branch. * Fix wrong commit information. --- net/ipv6/ip6_gre.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 45038a9..8451d00 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -604,12 +604,13 @@ static int gre_rcv(struct sk_buff *skb) tpi.proto == htons(ETH_P_ERSPAN2))) { if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) return 0; - goto drop; + goto out; } if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) return 0; +out: icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); -- 1.8.3.1
[PATCH 1/2] ip_gre: fix potential memory leak in erspan_rcv
If md is NULL, tun_dst must be freed, otherwise it will cause memory leak. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Cc: William Tu Signed-off-by: Haishuang Yan --- net/ipv4/ip_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d828821..9253d6f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -304,8 +304,10 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, return PACKET_REJECT; md = ip_tunnel_info_opts(&tun_dst->u.tun_info); - if (!md) + if (!md) { + dst_release((struct dst_entry *)tun_dst); return PACKET_REJECT; + } md->index = index; info = &tun_dst->u.tun_info; -- 1.8.3.1
[PATCH 2/2] ip6_gre: fix potential memory leak in ip6erspan_rcv
If md is NULL, tun_dst must be freed, otherwise it will cause memory leak Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Cc: William Tu Signed-off-by: Haishuang Yan --- net/ipv6/ip6_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4562579..b8b0e4b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -542,8 +542,10 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, info = &tun_dst->u.tun_info; md = ip_tunnel_info_opts(info); - if (!md) + if (!md) { + dst_release((struct dst_entry *)tun_dst); return PACKET_REJECT; + } md->index = index; info->key.tun_flags |= TUNNEL_ERSPAN_OPT; -- 1.8.3.1
[PATCH 1/2] ip_gre: fix error path when erspan_rcv failed
When erspan_rcv call return PACKET_REJECT, we shoudn't call ipgre_rcv to process packets again, instead send icmp unreachable message in error path. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Cc: William Tu Signed-off-by: Haishuang Yan --- net/ipv4/ip_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 9253d6f..61ee014 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -411,11 +411,13 @@ static int gre_rcv(struct sk_buff *skb) if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) { if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; + goto out; } if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; +out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: kfree_skb(skb); -- 1.8.3.1
[PATCH 2/2] ip6_gre: fix error path when ip6erspan_rcv failed
Same as ipv4 code, when ip6erspan_rcv call return PACKET_REJECT, we should call icmpv6_send to send icmp unreachable message in error path. Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Cc: William Tu Signed-off-by: Haishuang Yan --- net/ipv6/ip6_gre.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b8b0e4b..68e7eef 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -580,12 +580,13 @@ static int gre_rcv(struct sk_buff *skb) if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) { if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) return 0; - goto drop; + goto out; } if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) return 0; +out: icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); -- 1.8.3.1
[PATCH] ip_gre: fix wrong return value of erspan_rcv
If pskb_may_pull return failed, return PACKET_REJECT instead of -ENOMEM. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Cc: William Tu Signed-off-by: Haishuang Yan --- net/ipv4/ip_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 61ee014..d747d06 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -267,7 +267,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, len = gre_hdr_len + sizeof(*ershdr); if (unlikely(!pskb_may_pull(skb, len))) - return -ENOMEM; + return PACKET_REJECT; iph = ip_hdr(skb); ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); -- 1.8.3.1
[PATCH] ip6_gre: fix a pontential issue in ip6erspan_rcv
pskb_may_pull() can change skb->data, so we need to load ipv6h/ershdr at the right place. Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Cc: William Tu Signed-off-by: Haishuang Yan --- net/ipv6/ip6_gre.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 68e7eef..eab4b56 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -506,12 +506,12 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, struct ip6_tnl *tunnel; __be32 index; - ipv6h = ipv6_hdr(skb); - ershdr = (struct erspanhdr *)skb->data; - if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr return PACKET_REJECT; + ipv6h = ipv6_hdr(skb); + ershdr = (struct erspanhdr *)skb->data; + tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); index = ershdr->md.index; -- 1.8.3.1
[PATCH v2,net-next] ip6_gre: fix a pontential issue in ip6erspan_rcv
pskb_may_pull() can change skb->data, so we need to load ipv6h/ershdr at the right place. Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Acked-by: William Tu Cc: William Tu Signed-off-by: Haishuang Yan --- Change since v2: * Rebase on latest master. --- net/ipv6/ip6_gre.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f210f9c..aa1512e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -507,12 +507,11 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, struct ip6_tnl *tunnel; u8 ver; - ipv6h = ipv6_hdr(skb); - ershdr = (struct erspan_base_hdr *)skb->data; - if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr return PACKET_REJECT; + ipv6h = ipv6_hdr(skb); + ershdr = (struct erspan_base_hdr *)skb->data; ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET; tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); pkt_md = (struct erspan_metadata *)(ershdr + 1); -- 1.8.3.1
[PATCH v2,net-next 1/2] ip_gre: fix potential memory leak in erspan_rcv
If md is NULL, tun_dst must be freed, otherwise it will cause memory leak. Fixes: 1a66a836da6 ("gre: add collect_md mode to ERSPAN tunnel") Cc: William Tu Signed-off-by: Haishuang Yan Change since v2: * Rebase on latest master branch. * Correct wrong fix information. --- net/ipv4/ip_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 004800b..33af55a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -311,8 +311,10 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, return PACKET_REJECT; md = ip_tunnel_info_opts(&tun_dst->u.tun_info); - if (!md) + if (!md) { + dst_release((struct dst_entry *)tun_dst); return PACKET_REJECT; + } memcpy(md, pkt_md, sizeof(*md)); md->version = ver; -- 1.8.3.1
[PATCH v2,net-next 2/2] ip6_gre: fix potential memory leak in ip6erspan_rcv
If md is NULL, tun_dst must be freed, otherwise it will cause memory leak. Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode") Cc: William Tu Signed-off-by: Haishuang Yan Change since v2: * Rebase on latest master branch. * Correct wrong fix information. --- net/ipv6/ip6_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 5c9c65f..8ce9d42 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -549,8 +549,10 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, info = &tun_dst->u.tun_info; md = ip_tunnel_info_opts(info); - if (!md) + if (!md) { + dst_release((struct dst_entry *)tun_dst); return PACKET_REJECT; + } memcpy(md, pkt_md, sizeof(*md)); md->version = ver; -- 1.8.3.1
[PATCH v2,net-next 2/2] ip6_gre: fix error path when ip6erspan_rcv failed
Same as ipv4 code, when ip6erspan_rcv call return PACKET_REJECT, we should call icmpv6_send to send icmp unreachable message in error path. Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Acked-by: William Tu Cc: William Tu Signed-off-by: Haishuang Yan Change since v2: * Rebase on latest master branch. --- net/ipv6/ip6_gre.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8ce9d42..f210f9c 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -603,12 +603,13 @@ static int gre_rcv(struct sk_buff *skb) tpi.proto == htons(ETH_P_ERSPAN2))) { if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) return 0; - goto drop; + goto out; } if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) return 0; +out: icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); -- 1.8.3.1
[PATCH v2,net-next 1/2] ip_gre: fix error path when erspan_rcv failed
When erspan_rcv call return PACKET_REJECT, we shoudn't call ipgre_rcv to process packets again, instead send icmp unreachable message in error path. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Acked-by: William Tu Cc: William Tu Signed-off-by: Haishuang Yan Change since v2: * Rebase on latest master branch. --- net/ipv4/ip_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 33af55a..ccfc5bc 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -434,11 +434,13 @@ static int gre_rcv(struct sk_buff *skb) tpi.proto == htons(ETH_P_ERSPAN2))) { if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; + goto out; } if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; +out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: kfree_skb(skb); -- 1.8.3.1
[PATCH net-next 2/2] geneve: speedup geneve tunnels dismantle
Since we now hold RTNL lock in geneve_exit_net, it's better batch them to speedup geneve tunnel dismantle. Signed-off-by: Haishuang Yan --- drivers/net/geneve.c | 24 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index b718a02..667c44f 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1638,19 +1638,16 @@ static __net_init int geneve_init_net(struct net *net) return 0; } -static void __net_exit geneve_exit_net(struct net *net) +static void geneve_destroy_tunnels(struct net *net, struct list_head *head) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *geneve, *next; struct net_device *dev, *aux; - LIST_HEAD(list); - - rtnl_lock(); /* gather any geneve devices that were moved into this ns */ for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &geneve_link_ops) - unregister_netdevice_queue(dev, &list); + unregister_netdevice_queue(dev, head); /* now gather any other geneve devices that were created in this ns */ list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { @@ -1658,18 +1655,29 @@ static void __net_exit geneve_exit_net(struct net *net) * to the list by the previous loop. */ if (!net_eq(dev_net(geneve->dev), net)) - unregister_netdevice_queue(geneve->dev, &list); + unregister_netdevice_queue(geneve->dev, head); } + WARN_ON_ONCE(!list_empty(&gn->sock_list)); +} + +static void __net_exit geneve_exit_batch_net(struct list_head *net_list) +{ + struct net *net; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + geneve_destroy_tunnels(net, &list); + /* unregister the devices gathered above */ unregister_netdevice_many(&list); rtnl_unlock(); - WARN_ON_ONCE(!list_empty(&gn->sock_list)); } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, - .exit = geneve_exit_net, + .exit_batch = geneve_exit_batch_net, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; -- 1.8.3.1
[PATCH net-next 0/2] net: speedup geneve/vxlan tunnels dismantle
This patch series add batching to vxlan/geneve tunnels so that netns dismantles are less costly. Haishuang Yan (2): vxlan: speedup vxlan tunnels dismantle geneve: speedup geneve tunnels dismantle drivers/net/geneve.c | 24 drivers/net/vxlan.c | 26 +- 2 files changed, 33 insertions(+), 17 deletions(-) -- 1.8.3.1
[PATCH net-next 1/2] vxlan: speedup vxlan tunnels dismantle
Since we now hold RTNL lock in vxlan_exit_net, it's better to batch them to speedup vxlan tunnels dismantle. Signed-off-by: Haishuang Yan --- drivers/net/vxlan.c | 26 +- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 19b9cc5..48a0dc2 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3692,18 +3692,16 @@ static __net_init int vxlan_init_net(struct net *net) return 0; } -static void __net_exit vxlan_exit_net(struct net *net) +static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan, *next; struct net_device *dev, *aux; unsigned int h; - LIST_HEAD(list); - rtnl_lock(); for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &vxlan_link_ops) - unregister_netdevice_queue(dev, &list); + unregister_netdevice_queue(dev, head); list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) { /* If vxlan->dev is in the same netns, it has already been added @@ -3711,20 +3709,30 @@ static void __net_exit vxlan_exit_net(struct net *net) */ if (!net_eq(dev_net(vxlan->dev), net)) { gro_cells_destroy(&vxlan->gro_cells); - unregister_netdevice_queue(vxlan->dev, &list); + unregister_netdevice_queue(vxlan->dev, head); } } - unregister_netdevice_many(&list); - rtnl_unlock(); - for (h = 0; h < PORT_HASH_SIZE; ++h) WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h])); } +static void __net_exit vxlan_exit_batch_net(struct list_head *net_list) +{ + struct net *net; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + vxlan_destroy_tunnels(net, &list); + + unregister_netdevice_many(&list); + rtnl_unlock(); +} + static struct pernet_operations vxlan_net_ops = { .init = vxlan_init_net, - .exit = vxlan_exit_net, + .exit_batch = vxlan_exit_batch_net, .id = &vxlan_net_id, .size = sizeof(struct vxlan_net), }; -- 1.8.3.1
[PATCH v4 1/2] ip_tunnel: fix ip tunnel lookup in collect_md mode
In collect_md mode, if the tun dev is down, it still can call ip_tunnel_rcv to receive on packets, and the rx statistics increase improperly. When the md tunnel is down, it's not neccessary to increase RX drops for the tunnel device, packets would be recieved on fallback tunnel, and the RX drops on fallback device will be increased as expected. Fixes: 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata.") Cc: Pravin B Shelar Signed-off-by: Haishuang Yan --- Change since v4: * Make the commit message more clearer. * Fix wrong recipient addresss --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e1856bf..e9805ad 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -176,7 +176,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, return cand; t = rcu_dereference(itn->collect_md_tun); - if (t) + if (t && t->dev->flags & IFF_UP) return t; if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) -- 1.8.3.1
[PATCH v4 2/2] ip6_tunnel: fix ip6 tunnel lookup in collect_md mode
In collect_md mode, if the tun dev is down, it still can call __ip6_tnl_rcv to receive on packets, and the rx statistics increase improperly. When the md tunnel is down, it's not neccessary to increase RX drops for the tunnel device, packets would be recieved on fallback tunnel, and the RX drops on fallback device will be increased as expected. Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Cc: Alexei Starovoitov Signed-off-by: Haishuang Yan --- Change since v4: * Make the commit message more clearer * Fix wrong recipient address --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 10a693a..ae73164 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -171,7 +171,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) } t = rcu_dereference(ip6n->collect_md_tun); - if (t) + if (t && t->dev->flags & IFF_UP) return t; t = rcu_dereference(ip6n->tnls_wc[0]); -- 1.8.3.1
[PATCH] ipv4: Namespaceify tcp_fastopen knob
Different namespace application might require enable TCP Fast Open feature independently of the host. Reported-by: Luca BRUNO Signed-off-by: Haishuang Yan --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 1 - net/ipv4/af_inet.c | 7 --- net/ipv4/sysctl_net_ipv4.c | 42 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_fastopen.c| 13 ++--- net/ipv4/tcp_ipv4.c| 2 ++ samples/bpf/test_ipip.sh | 2 ++ 8 files changed, 39 insertions(+), 34 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 305e031..ea0953b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -128,6 +128,8 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_max_orphans; + int sysctl_tcp_fastopen; + unsigned int sysctl_tcp_fastopen_blackhole_timeout; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/include/net/tcp.h b/include/net/tcp.h index ac2d998..e4cc0dd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ /* sysctl variables for tcp */ -extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e31108e..309b849 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; unsigned char old_state; - int err; + int err, tcp_fastopen; lock_sock(sk); @@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && - (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(true); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4f26c8d3..30ebeb9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -394,27 +394,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, .proc_handler = proc_dointvec }, { - .procname = "tcp_fastopen", - .data = &sysctl_tcp_fastopen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_fastopen_key", - .mode = 0600, - .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), - .proc_handler = proc_tcp_fastopen_key, - }, - { - .procname = "tcp_fastopen_blackhole_timeout_sec", - .data = &sysctl_tcp_fastopen_blackhole_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_tfo_blackhole_detect_timeout, - .extra1 = &zero, - }, - { .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, .maxlen = sizeof(int), @@ -1085,6 +1064,27 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_fastopen", + .data = &init_net.ipv4.sysctl_tcp_fastopen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_fastopen_key", + .mode = 0600, + .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), + .proc_handler = proc_tcp_fastopen_key, + }, + { + .procname = "tcp_fastopen_blackhole_timeout_sec", + .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tfo_blackhole_detect_timeout, +
[PATCH v2] ipv4: Namespaceify tcp_fastopen knob
Different namespace application might require enable TCP Fast Open feature independently of the host. Reported-by: Luca BRUNO Signed-off-by: Haishuang Yan --- Change since v2: * Remove unrelated change by mistake --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 1 - net/ipv4/af_inet.c | 7 --- net/ipv4/sysctl_net_ipv4.c | 42 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_fastopen.c| 13 ++--- net/ipv4/tcp_ipv4.c| 2 ++ 7 files changed, 37 insertions(+), 34 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 305e031..ea0953b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -128,6 +128,8 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_max_orphans; + int sysctl_tcp_fastopen; + unsigned int sysctl_tcp_fastopen_blackhole_timeout; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/include/net/tcp.h b/include/net/tcp.h index ac2d998..e4cc0dd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ /* sysctl variables for tcp */ -extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e31108e..309b849 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; unsigned char old_state; - int err; + int err, tcp_fastopen; lock_sock(sk); @@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && - (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(true); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4f26c8d3..30ebeb9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -394,27 +394,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, .proc_handler = proc_dointvec }, { - .procname = "tcp_fastopen", - .data = &sysctl_tcp_fastopen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_fastopen_key", - .mode = 0600, - .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), - .proc_handler = proc_tcp_fastopen_key, - }, - { - .procname = "tcp_fastopen_blackhole_timeout_sec", - .data = &sysctl_tcp_fastopen_blackhole_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_tfo_blackhole_detect_timeout, - .extra1 = &zero, - }, - { .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, .maxlen = sizeof(int), @@ -1085,6 +1064,27 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_fastopen", + .data = &init_net.ipv4.sysctl_tcp_fastopen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_fastopen_key", + .mode = 0600, + .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), + .proc_handler = proc_tcp_fastopen_key, + }, + { + .procname = "tcp_fastopen_blackhole_timeout_sec", + .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_
[PATCH] be2net: Fix some u16 fields appropriately
In be_tx_compl_process, frag_index declared as u32, so it's better to declare last_index as u32 also. CC: Ajit Khaparde Fixes: b0fd2eb28bd4 ("be2net: Declare some u16 fields as u32 to improve performance") Signed-off-by: Haishuang Yan --- drivers/net/ethernet/emulex/benet/be.h | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 674cf9d..2ba4d61 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -255,7 +255,7 @@ struct be_tx_stats { /* Structure to hold some data of interest obtained from a TX CQE */ struct be_tx_compl_info { u8 status; /* Completion status */ - u16 end_index; /* Completed TXQ Index */ + u32 end_index; /* Completed TXQ Index */ }; struct be_tx_obj { diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 319eee3..3645344 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2606,7 +2606,7 @@ static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo) } static u16 be_tx_compl_process(struct be_adapter *adapter, - struct be_tx_obj *txo, u16 last_index) + struct be_tx_obj *txo, u32 last_index) { struct sk_buff **sent_skbs = txo->sent_skb_list; struct be_queue_info *txq = &txo->q; -- 1.8.3.1