Currently there is no rate limit on tunnel hardware address ARP/ND
lookups. Furthermote, there is no indication for how frequently action
translation composes these packets.

This patch implements a limit of generating one packet per destination
per second and adds a counter for each time a lookup happens.

Fixes: a36de779d739 ("openvswitch: Userspace tunneling.")
Reported-at: https://issues.redhat.com/browse/FDP-2986
Signed-off-by: Mike Pattrick <[email protected]>
---
 NEWS                               |  2 +
 lib/tnl-neigh-cache.c              | 97 ++++++++++++++++++++++++++----
 lib/tnl-neigh-cache.h              |  2 +-
 ofproto/ofproto-dpif-xlate-cache.c |  2 +-
 ofproto/ofproto-dpif-xlate.c       | 12 +++-
 tests/tunnel-push-pop-ipv6.at      | 19 +++++-
 tests/tunnel-push-pop.at           | 15 ++++-
 7 files changed, 130 insertions(+), 19 deletions(-)

diff --git a/NEWS b/NEWS
index c3470b84e..809b4f114 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,7 @@
 Post-v3.7.0
 --------------------
+   - Userspace datapath:
+     * ARP/ND lookups for native tunnel are now rate limited.
 
 
 v3.7.0 - xx xxx xxxx
diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index bdff1debc..78da2bc12 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -47,6 +47,7 @@
 
 #define NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS  (15 * 60 * 1000)
 #define NEIGH_ENTRY_MAX_AGING_TIME_S  3600
+#define NEIGH_ENTRY_LOOKUP_HOLDOUT_MS   1000
 
 struct tnl_neigh_entry {
     struct cmap_node cmap_node;
@@ -54,12 +55,16 @@ struct tnl_neigh_entry {
     struct eth_addr mac;
     atomic_llong expires;       /* Expiration time in ms. */
     char br_name[IFNAMSIZ];
+    atomic_bool complete;
 };
 
 static struct cmap table = CMAP_INITIALIZER;
 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
 static atomic_uint32_t neigh_aging;
 
+void
+tnl_neigh_set_partial(const char name[IFNAMSIZ], const struct in6_addr *dst);
+
 static uint32_t
 tnl_neigh_hash(const struct in6_addr *ip)
 {
@@ -85,6 +90,16 @@ tnl_neigh_get_aging(void)
     return aging;
 }
 
+static bool
+tnl_neigh_is_complete(struct tnl_neigh_entry *neigh)
+{
+    bool complete;
+
+    atomic_read_explicit(&neigh->complete, &complete, memory_order_acquire);
+
+    return complete;
+}
+
 static struct tnl_neigh_entry *
 tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
 {
@@ -98,9 +113,12 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const 
struct in6_addr *dst)
                 return NULL;
             }
 
-            atomic_store_explicit(&neigh->expires, time_msec() +
-                                  tnl_neigh_get_aging(),
-                                  memory_order_release);
+            if (tnl_neigh_is_complete(neigh)) {
+                atomic_store_explicit(&neigh->expires, time_msec() +
+                                      tnl_neigh_get_aging(),
+                                      memory_order_release);
+            }
+
             return neigh;
         }
     }
@@ -109,16 +127,24 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const 
struct in6_addr *dst)
 
 int
 tnl_neigh_lookup(const char br_name[IFNAMSIZ], const struct in6_addr *dst,
-                 struct eth_addr *mac)
+                 struct eth_addr *mac, bool readonly)
 {
     struct tnl_neigh_entry *neigh;
     int res = ENOENT;
 
     neigh = tnl_neigh_lookup__(br_name, dst);
     if (neigh) {
-        *mac = neigh->mac;
-        res = 0;
+        if (tnl_neigh_is_complete(neigh)) {
+            *mac = neigh->mac;
+            res = 0;
+        } else {
+            res = EINPROGRESS;
+        }
+    } else if (!readonly) {
+        /* Insert a partial entry. */
+        tnl_neigh_set_partial(br_name, dst);
     }
+
     return res;
 }
 
@@ -142,6 +168,7 @@ tnl_neigh_set(const char name[IFNAMSIZ], const struct 
in6_addr *dst,
 {
     ovs_mutex_lock(&mutex);
     struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
+    bool insert = true;
     if (neigh) {
         if (eth_addr_equals(neigh->mac, mac)) {
             atomic_store_relaxed(&neigh->expires, time_msec() +
@@ -149,18 +176,30 @@ tnl_neigh_set(const char name[IFNAMSIZ], const struct 
in6_addr *dst,
             ovs_mutex_unlock(&mutex);
             return;
         }
-        tnl_neigh_delete(neigh);
+        if (tnl_neigh_is_complete(neigh)) {
+            tnl_neigh_delete(neigh);
+        } else {
+            insert = false;
+        }
     }
-    seq_change(tnl_conf_seq);
 
-    neigh = xmalloc(sizeof *neigh);
+    if (insert) {
+        neigh = xmalloc(sizeof *neigh);
+
+        neigh->ip = *dst;
+        ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
+        insert = true;
+    }
+    seq_change(tnl_conf_seq);
 
-    neigh->ip = *dst;
     neigh->mac = mac;
     atomic_store_relaxed(&neigh->expires, time_msec() +
                          tnl_neigh_get_aging());
-    ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
-    cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
+    atomic_store_relaxed(&neigh->complete, true);
+
+    if (insert) {
+        cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
+    }
     ovs_mutex_unlock(&mutex);
 }
 
@@ -172,6 +211,37 @@ tnl_arp_set(const char name[IFNAMSIZ], ovs_be32 dst,
     tnl_neigh_set(name, &dst6, mac);
 }
 
+void
+tnl_neigh_set_partial(const char name[IFNAMSIZ], const struct in6_addr *dst)
+{
+    ovs_mutex_lock(&mutex);
+    struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
+    if (neigh) {
+        if (!tnl_neigh_is_complete(neigh)) {
+            atomic_store_relaxed(&neigh->expires, time_msec() +
+                                 NEIGH_ENTRY_LOOKUP_HOLDOUT_MS);
+            goto done;
+        } else if (!tnl_neigh_expired(neigh)) {
+            /* Case where initial lookup did not find a complete entry but it
+             * was inserted before we aquired a lock. */
+            goto done;
+        }
+        tnl_neigh_delete(neigh);
+    }
+    seq_change(tnl_conf_seq);
+
+    neigh = xmalloc(sizeof *neigh);
+
+    neigh->ip = *dst;
+    neigh->complete = false;
+    neigh->expires = time_msec() + NEIGH_ENTRY_LOOKUP_HOLDOUT_MS;
+    ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
+    cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
+
+done:
+    ovs_mutex_unlock(&mutex);
+}
+
 static int
 tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
               const char name[IFNAMSIZ], bool allow_update)
@@ -381,6 +451,9 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
     CMAP_FOR_EACH(neigh, cmap_node, &table) {
         int start_len, need_ws;
 
+        if (!tnl_neigh_is_complete(neigh)) {
+            continue;
+        }
         start_len = ds.length;
         ipv6_format_mapped(&neigh->ip, &ds);
 
diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h
index 877bca312..03bd3e07c 100644
--- a/lib/tnl-neigh-cache.h
+++ b/lib/tnl-neigh-cache.h
@@ -36,7 +36,7 @@ int tnl_neigh_snoop(const struct flow *flow, struct 
flow_wildcards *wc,
 void tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst,
                    const struct eth_addr mac);
 int tnl_neigh_lookup(const char dev_name[IFNAMSIZ], const struct in6_addr *dst,
-                     struct eth_addr *mac);
+                     struct eth_addr *mac, bool readonly);
 void tnl_neigh_cache_init(void);
 void tnl_neigh_cache_run(void);
 void tnl_neigh_flush(const char dev_name[IFNAMSIZ]);
diff --git a/ofproto/ofproto-dpif-xlate-cache.c 
b/ofproto/ofproto-dpif-xlate-cache.c
index c6d935cf0..7dda67833 100644
--- a/ofproto/ofproto-dpif-xlate-cache.c
+++ b/ofproto/ofproto-dpif-xlate-cache.c
@@ -152,7 +152,7 @@ xlate_push_stats_entry(struct xc_entry *entry,
     case XC_TNL_NEIGH:
         /* Lookup neighbor to avoid timeout. */
         tnl_neigh_lookup(entry->tnl_neigh_cache.br_name,
-                         &entry->tnl_neigh_cache.d_ipv6, &dmac);
+                         &entry->tnl_neigh_cache.d_ipv6, &dmac, true);
         break;
     case XC_TUNNEL_HEADER:
         if (entry->tunnel_hdr.operation == ADD) {
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index f2db4723b..de068d6f4 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -72,6 +72,7 @@
 COVERAGE_DEFINE(xlate_actions);
 COVERAGE_DEFINE(xlate_actions_oversize);
 COVERAGE_DEFINE(xlate_actions_too_many_output);
+COVERAGE_DEFINE(xlate_actions_sent_packet);
 
 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
 
@@ -3931,17 +3932,26 @@ native_tunnel_output(struct xlate_ctx *ctx, const 
struct xport *xport,
         s_ip = in6_addr_get_mapped_ipv4(&s_ip6);
     }
 
-    err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac);
+    err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac, false);
     if (err) {
         struct in6_addr nh_s_ip6 = in6addr_any;
 
         put_cloned_drop_action(ctx->xbridge->ofproto, ctx->odp_actions,
                                XLATE_TUNNEL_NEIGH_CACHE_MISS,
                                !is_last_action);
+        if (err == EINPROGRESS) {
+            xlate_report(ctx, OFT_DETAIL,
+                         "neighbor cache miss for %s on bridge %s, "
+                         "waiting on %s request",
+                         buf_dip6, out_dev->xbridge->name,
+                         d_ip ? "ARP" : "ND");
+            return err;
+        }
         xlate_report(ctx, OFT_DETAIL,
                      "neighbor cache miss for %s on bridge %s, "
                      "sending %s request",
                      buf_dip6, out_dev->xbridge->name, d_ip ? "ARP" : "ND");
+        COVERAGE_INC(xlate_actions_sent_packet);
 
         err = ovs_router_get_netdev_source_address(
             &d_ip6, netdev_get_name(out_dev->netdev), &nh_s_ip6);
diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at
index b11387345..7263bf4f0 100644
--- a/tests/tunnel-push-pop-ipv6.at
+++ b/tests/tunnel-push-pop-ipv6.at
@@ -344,7 +344,20 @@ AT_CHECK([ovs-ofctl add-flow br0 action=normal])
 dnl Check Neighbour discovery.
 AT_CHECK([ovs-vsctl -- set Interface p0 options:pcap=p0.pcap])
 
-AT_CHECK([ovs-appctl netdev-dummy/receive int-br 
'in_port(2),eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)'])
+dnl First trace should send only two ND lookups, for each destination IP
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
+  [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
+  [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
+  [icmp(type=0,code=0)"], [0], [stdout])
+AT_CHECK([grep -q "sending ND request" stdout], [0])
+
+dnl Second trace should not send any ND lookups
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
+  [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
+  [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
+  [icmp(type=0,code=0)"], [0], [stdout])
+AT_CHECK([grep -q "waiting on ND request" stdout], [0])
+AT_CHECK([grep -qv "sending ND request" stdout], [0])
 
 dnl Wait for the two Neighbor Solicitation packets to be sent.
 dnl Sometimes the system can be slow (e.g. under valgrind)
@@ -577,8 +590,8 @@ 
icmp,vlan_tci=0x0000,dl_src=be:b6:f4:e1:49:4a,dl_dst=fe:71:d8:83:72:4f,nw_src=30
 AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port  5'], [0], [dnl
   port  5: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=?
 ])
-AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl
-recirc_id(0),tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(+key)),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
 packets:0, bytes:0, used:never, 
actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=3,rule_cookie=0,controller_id=0,max_len=65535))
+AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)' | 
strip_recirc], [0], [dnl
+recirc_id(<recirc>),tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(+key)),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
 packets:0, bytes:0, used:never, 
actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=<recirc>,rule_cookie=0,controller_id=0,max_len=65535))
 ])
 
 dnl Receive VXLAN with different MAC and verify that the neigh cache gets 
updated
diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
index f22a37570..e7211149b 100644
--- a/tests/tunnel-push-pop.at
+++ b/tests/tunnel-push-pop.at
@@ -256,7 +256,20 @@ AT_CHECK([ovs-appctl revalidator/wait])
 dnl Check ARP request
 AT_CHECK([ovs-vsctl -- set Interface p0 options:pcap=p0.pcap])
 
-AT_CHECK([ovs-appctl netdev-dummy/receive int-br 
'in_port(2),eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)'])
+dnl First trace should send only two ARP lookups, for each destination IP
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
+  [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
+  [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
+  [icmp(type=0,code=0)"], [0], [stdout])
+AT_CHECK([grep -q "sending ARP request" stdout], [0])
+
+dnl Second trace should not send any ARP lookups
+AT_CHECK([[ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
+  [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
+  [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
+  [icmp(type=0,code=0)"]], [0], [stdout])
+AT_CHECK([grep -q "waiting on ARP request" stdout], [0])
+AT_CHECK([grep -qv "sending ARP request" stdout], [0])
 
 dnl Wait for the two ARP requests to be sent. Sometimes the system
 dnl can be slow (e.g. under valgrind)
-- 
2.52.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to