Currently there is no rate limit on tunnel hardware address ARP/ND
lookups. Furthermote, there is no indication for how frequently action
translation composes these packets.
This patch implements a limit of generating one packet per destination
per second and adds a counter for each time a lookup happens.
Fixes: a36de779d739 ("openvswitch: Userspace tunneling.")
Reported-at: https://issues.redhat.com/browse/FDP-2986
Signed-off-by: Mike Pattrick <[email protected]>
---
NEWS | 2 +
lib/tnl-neigh-cache.c | 97 ++++++++++++++++++++++++++----
lib/tnl-neigh-cache.h | 2 +-
ofproto/ofproto-dpif-xlate-cache.c | 2 +-
ofproto/ofproto-dpif-xlate.c | 12 +++-
tests/tunnel-push-pop-ipv6.at | 19 +++++-
tests/tunnel-push-pop.at | 15 ++++-
7 files changed, 130 insertions(+), 19 deletions(-)
diff --git a/NEWS b/NEWS
index c3470b84e..809b4f114 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,7 @@
Post-v3.7.0
--------------------
+ - Userspace datapath:
+ * ARP/ND lookups for native tunnel are now rate limited.
v3.7.0 - xx xxx xxxx
diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index bdff1debc..78da2bc12 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -47,6 +47,7 @@
#define NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS (15 * 60 * 1000)
#define NEIGH_ENTRY_MAX_AGING_TIME_S 3600
+#define NEIGH_ENTRY_LOOKUP_HOLDOUT_MS 1000
struct tnl_neigh_entry {
struct cmap_node cmap_node;
@@ -54,12 +55,16 @@ struct tnl_neigh_entry {
struct eth_addr mac;
atomic_llong expires; /* Expiration time in ms. */
char br_name[IFNAMSIZ];
+ atomic_bool complete;
};
static struct cmap table = CMAP_INITIALIZER;
static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
static atomic_uint32_t neigh_aging;
+void
+tnl_neigh_set_partial(const char name[IFNAMSIZ], const struct in6_addr *dst);
+
static uint32_t
tnl_neigh_hash(const struct in6_addr *ip)
{
@@ -85,6 +90,16 @@ tnl_neigh_get_aging(void)
return aging;
}
+static bool
+tnl_neigh_is_complete(struct tnl_neigh_entry *neigh)
+{
+ bool complete;
+
+ atomic_read_explicit(&neigh->complete, &complete, memory_order_acquire);
+
+ return complete;
+}
+
static struct tnl_neigh_entry *
tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
{
@@ -98,9 +113,12 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const
struct in6_addr *dst)
return NULL;
}
- atomic_store_explicit(&neigh->expires, time_msec() +
- tnl_neigh_get_aging(),
- memory_order_release);
+ if (tnl_neigh_is_complete(neigh)) {
+ atomic_store_explicit(&neigh->expires, time_msec() +
+ tnl_neigh_get_aging(),
+ memory_order_release);
+ }
+
return neigh;
}
}
@@ -109,16 +127,24 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const
struct in6_addr *dst)
int
tnl_neigh_lookup(const char br_name[IFNAMSIZ], const struct in6_addr *dst,
- struct eth_addr *mac)
+ struct eth_addr *mac, bool readonly)
{
struct tnl_neigh_entry *neigh;
int res = ENOENT;
neigh = tnl_neigh_lookup__(br_name, dst);
if (neigh) {
- *mac = neigh->mac;
- res = 0;
+ if (tnl_neigh_is_complete(neigh)) {
+ *mac = neigh->mac;
+ res = 0;
+ } else {
+ res = EINPROGRESS;
+ }
+ } else if (!readonly) {
+ /* Insert a partial entry. */
+ tnl_neigh_set_partial(br_name, dst);
}
+
return res;
}
@@ -142,6 +168,7 @@ tnl_neigh_set(const char name[IFNAMSIZ], const struct
in6_addr *dst,
{
ovs_mutex_lock(&mutex);
struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
+ bool insert = true;
if (neigh) {
if (eth_addr_equals(neigh->mac, mac)) {
atomic_store_relaxed(&neigh->expires, time_msec() +
@@ -149,18 +176,30 @@ tnl_neigh_set(const char name[IFNAMSIZ], const struct
in6_addr *dst,
ovs_mutex_unlock(&mutex);
return;
}
- tnl_neigh_delete(neigh);
+ if (tnl_neigh_is_complete(neigh)) {
+ tnl_neigh_delete(neigh);
+ } else {
+ insert = false;
+ }
}
- seq_change(tnl_conf_seq);
- neigh = xmalloc(sizeof *neigh);
+ if (insert) {
+ neigh = xmalloc(sizeof *neigh);
+
+ neigh->ip = *dst;
+ ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
+ insert = true;
+ }
+ seq_change(tnl_conf_seq);
- neigh->ip = *dst;
neigh->mac = mac;
atomic_store_relaxed(&neigh->expires, time_msec() +
tnl_neigh_get_aging());
- ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
- cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
+ atomic_store_relaxed(&neigh->complete, true);
+
+ if (insert) {
+ cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
+ }
ovs_mutex_unlock(&mutex);
}
@@ -172,6 +211,37 @@ tnl_arp_set(const char name[IFNAMSIZ], ovs_be32 dst,
tnl_neigh_set(name, &dst6, mac);
}
+void
+tnl_neigh_set_partial(const char name[IFNAMSIZ], const struct in6_addr *dst)
+{
+ ovs_mutex_lock(&mutex);
+ struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
+ if (neigh) {
+ if (!tnl_neigh_is_complete(neigh)) {
+ atomic_store_relaxed(&neigh->expires, time_msec() +
+ NEIGH_ENTRY_LOOKUP_HOLDOUT_MS);
+ goto done;
+ } else if (!tnl_neigh_expired(neigh)) {
+ /* Case where initial lookup did not find a complete entry but it
+ * was inserted before we aquired a lock. */
+ goto done;
+ }
+ tnl_neigh_delete(neigh);
+ }
+ seq_change(tnl_conf_seq);
+
+ neigh = xmalloc(sizeof *neigh);
+
+ neigh->ip = *dst;
+ neigh->complete = false;
+ neigh->expires = time_msec() + NEIGH_ENTRY_LOOKUP_HOLDOUT_MS;
+ ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
+ cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
+
+done:
+ ovs_mutex_unlock(&mutex);
+}
+
static int
tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
const char name[IFNAMSIZ], bool allow_update)
@@ -381,6 +451,9 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc
OVS_UNUSED,
CMAP_FOR_EACH(neigh, cmap_node, &table) {
int start_len, need_ws;
+ if (!tnl_neigh_is_complete(neigh)) {
+ continue;
+ }
start_len = ds.length;
ipv6_format_mapped(&neigh->ip, &ds);
diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h
index 877bca312..03bd3e07c 100644
--- a/lib/tnl-neigh-cache.h
+++ b/lib/tnl-neigh-cache.h
@@ -36,7 +36,7 @@ int tnl_neigh_snoop(const struct flow *flow, struct
flow_wildcards *wc,
void tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst,
const struct eth_addr mac);
int tnl_neigh_lookup(const char dev_name[IFNAMSIZ], const struct in6_addr *dst,
- struct eth_addr *mac);
+ struct eth_addr *mac, bool readonly);
void tnl_neigh_cache_init(void);
void tnl_neigh_cache_run(void);
void tnl_neigh_flush(const char dev_name[IFNAMSIZ]);
diff --git a/ofproto/ofproto-dpif-xlate-cache.c
b/ofproto/ofproto-dpif-xlate-cache.c
index c6d935cf0..7dda67833 100644
--- a/ofproto/ofproto-dpif-xlate-cache.c
+++ b/ofproto/ofproto-dpif-xlate-cache.c
@@ -152,7 +152,7 @@ xlate_push_stats_entry(struct xc_entry *entry,
case XC_TNL_NEIGH:
/* Lookup neighbor to avoid timeout. */
tnl_neigh_lookup(entry->tnl_neigh_cache.br_name,
- &entry->tnl_neigh_cache.d_ipv6, &dmac);
+ &entry->tnl_neigh_cache.d_ipv6, &dmac, true);
break;
case XC_TUNNEL_HEADER:
if (entry->tunnel_hdr.operation == ADD) {
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index f2db4723b..de068d6f4 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -72,6 +72,7 @@
COVERAGE_DEFINE(xlate_actions);
COVERAGE_DEFINE(xlate_actions_oversize);
COVERAGE_DEFINE(xlate_actions_too_many_output);
+COVERAGE_DEFINE(xlate_actions_sent_packet);
VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
@@ -3931,17 +3932,26 @@ native_tunnel_output(struct xlate_ctx *ctx, const
struct xport *xport,
s_ip = in6_addr_get_mapped_ipv4(&s_ip6);
}
- err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac);
+ err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac, false);
if (err) {
struct in6_addr nh_s_ip6 = in6addr_any;
put_cloned_drop_action(ctx->xbridge->ofproto, ctx->odp_actions,
XLATE_TUNNEL_NEIGH_CACHE_MISS,
!is_last_action);
+ if (err == EINPROGRESS) {
+ xlate_report(ctx, OFT_DETAIL,
+ "neighbor cache miss for %s on bridge %s, "
+ "waiting on %s request",
+ buf_dip6, out_dev->xbridge->name,
+ d_ip ? "ARP" : "ND");
+ return err;
+ }
xlate_report(ctx, OFT_DETAIL,
"neighbor cache miss for %s on bridge %s, "
"sending %s request",
buf_dip6, out_dev->xbridge->name, d_ip ? "ARP" : "ND");
+ COVERAGE_INC(xlate_actions_sent_packet);
err = ovs_router_get_netdev_source_address(
&d_ip6, netdev_get_name(out_dev->netdev), &nh_s_ip6);
diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at
index b11387345..7263bf4f0 100644
--- a/tests/tunnel-push-pop-ipv6.at
+++ b/tests/tunnel-push-pop-ipv6.at
@@ -344,7 +344,20 @@ AT_CHECK([ovs-ofctl add-flow br0 action=normal])
dnl Check Neighbour discovery.
AT_CHECK([ovs-vsctl -- set Interface p0 options:pcap=p0.pcap])
-AT_CHECK([ovs-appctl netdev-dummy/receive int-br
'in_port(2),eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)'])
+dnl First trace should send only two ND lookups, for each destination IP
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
+ [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
+ [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
+ [icmp(type=0,code=0)"], [0], [stdout])
+AT_CHECK([grep -q "sending ND request" stdout], [0])
+
+dnl Second trace should not send any ND lookups
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
+ [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
+ [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
+ [icmp(type=0,code=0)"], [0], [stdout])
+AT_CHECK([grep -q "waiting on ND request" stdout], [0])
+AT_CHECK([grep -qv "sending ND request" stdout], [0])
dnl Wait for the two Neighbor Solicitation packets to be sent.
dnl Sometimes the system can be slow (e.g. under valgrind)
@@ -577,8 +590,8 @@
icmp,vlan_tci=0x0000,dl_src=be:b6:f4:e1:49:4a,dl_dst=fe:71:d8:83:72:4f,nw_src=30
AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 5'], [0], [dnl
port 5: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=?
])
-AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl
-recirc_id(0),tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(+key)),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
packets:0, bytes:0, used:never,
actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=3,rule_cookie=0,controller_id=0,max_len=65535))
+AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)' |
strip_recirc], [0], [dnl
+recirc_id(<recirc>),tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(+key)),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
packets:0, bytes:0, used:never,
actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=<recirc>,rule_cookie=0,controller_id=0,max_len=65535))
])
dnl Receive VXLAN with different MAC and verify that the neigh cache gets
updated
diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
index f22a37570..e7211149b 100644
--- a/tests/tunnel-push-pop.at
+++ b/tests/tunnel-push-pop.at
@@ -256,7 +256,20 @@ AT_CHECK([ovs-appctl revalidator/wait])
dnl Check ARP request
AT_CHECK([ovs-vsctl -- set Interface p0 options:pcap=p0.pcap])
-AT_CHECK([ovs-appctl netdev-dummy/receive int-br
'in_port(2),eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)'])
+dnl First trace should send only two ARP lookups, for each destination IP
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
+ [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
+ [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
+ [icmp(type=0,code=0)"], [0], [stdout])
+AT_CHECK([grep -q "sending ARP request" stdout], [0])
+
+dnl Second trace should not send any ARP lookups
+AT_CHECK([[ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
+ [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
+ [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
+ [icmp(type=0,code=0)"]], [0], [stdout])
+AT_CHECK([grep -q "waiting on ARP request" stdout], [0])
+AT_CHECK([grep -qv "sending ARP request" stdout], [0])
dnl Wait for the two ARP requests to be sent. Sometimes the system
dnl can be slow (e.g. under valgrind)
--
2.52.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev