Hi Mike

Mike Pattrick via dev <[email protected]> writes:

> Currently there is no rate limit on tunnel hardware address ARP/ND
> lookups. Furthermote, there is no indication for how frequently action
> translation composes these packets.
>
> This patch implements a limit of generating one packet per destination
> per second and adds a counter for each time a lookup happens.
>
> Fixes: a36de779d739 ("openvswitch: Userspace tunneling.")
> Reported-at: https://issues.redhat.com/browse/FDP-2986
> Signed-off-by: Mike Pattrick <[email protected]>
> ---
>  NEWS                               |  2 +
>  lib/tnl-neigh-cache.c              | 97 ++++++++++++++++++++++++++----
>  lib/tnl-neigh-cache.h              |  2 +-
>  ofproto/ofproto-dpif-xlate-cache.c |  2 +-
>  ofproto/ofproto-dpif-xlate.c       | 12 +++-
>  tests/tunnel-push-pop-ipv6.at      | 19 +++++-
>  tests/tunnel-push-pop.at           | 15 ++++-
>  7 files changed, 130 insertions(+), 19 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index c3470b84e..809b4f114 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,5 +1,7 @@
>  Post-v3.7.0
>  --------------------
> +   - Userspace datapath:
> +     * ARP/ND lookups for native tunnel are now rate limited.

Should this rate limit also be configurable by the user?  Or even
something that could be disabled (like '0' value of rate limit)?  I
worry that some setups may have come to rely on the arp lookup behavior,
and will not function properly.  WDYT?

I didn't review the ordering of the tnl complete logic for any kind of
weird cases, but I plan to do that next week.

>  
>  v3.7.0 - xx xxx xxxx
> diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
> index bdff1debc..78da2bc12 100644
> --- a/lib/tnl-neigh-cache.c
> +++ b/lib/tnl-neigh-cache.c
> @@ -47,6 +47,7 @@
>  
>  #define NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS  (15 * 60 * 1000)
>  #define NEIGH_ENTRY_MAX_AGING_TIME_S  3600
> +#define NEIGH_ENTRY_LOOKUP_HOLDOUT_MS   1000
>  
>  struct tnl_neigh_entry {
>      struct cmap_node cmap_node;
> @@ -54,12 +55,16 @@ struct tnl_neigh_entry {
>      struct eth_addr mac;
>      atomic_llong expires;       /* Expiration time in ms. */
>      char br_name[IFNAMSIZ];
> +    atomic_bool complete;
>  };
>  
>  static struct cmap table = CMAP_INITIALIZER;
>  static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
>  static atomic_uint32_t neigh_aging;
>  
> +void
> +tnl_neigh_set_partial(const char name[IFNAMSIZ], const struct in6_addr *dst);
> +
>  static uint32_t
>  tnl_neigh_hash(const struct in6_addr *ip)
>  {
> @@ -85,6 +90,16 @@ tnl_neigh_get_aging(void)
>      return aging;
>  }
>  
> +static bool
> +tnl_neigh_is_complete(struct tnl_neigh_entry *neigh)
> +{
> +    bool complete;
> +
> +    atomic_read_explicit(&neigh->complete, &complete, memory_order_acquire);
> +
> +    return complete;
> +}
> +
>  static struct tnl_neigh_entry *
>  tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
>  {
> @@ -98,9 +113,12 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const 
> struct in6_addr *dst)
>                  return NULL;
>              }
>  
> -            atomic_store_explicit(&neigh->expires, time_msec() +
> -                                  tnl_neigh_get_aging(),
> -                                  memory_order_release);
> +            if (tnl_neigh_is_complete(neigh)) {
> +                atomic_store_explicit(&neigh->expires, time_msec() +
> +                                      tnl_neigh_get_aging(),
> +                                      memory_order_release);
> +            }
> +
>              return neigh;
>          }
>      }
> @@ -109,16 +127,24 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const 
> struct in6_addr *dst)
>  
>  int
>  tnl_neigh_lookup(const char br_name[IFNAMSIZ], const struct in6_addr *dst,
> -                 struct eth_addr *mac)
> +                 struct eth_addr *mac, bool readonly)
>  {
>      struct tnl_neigh_entry *neigh;
>      int res = ENOENT;
>  
>      neigh = tnl_neigh_lookup__(br_name, dst);
>      if (neigh) {
> -        *mac = neigh->mac;
> -        res = 0;
> +        if (tnl_neigh_is_complete(neigh)) {
> +            *mac = neigh->mac;
> +            res = 0;
> +        } else {
> +            res = EINPROGRESS;
> +        }
> +    } else if (!readonly) {
> +        /* Insert a partial entry. */
> +        tnl_neigh_set_partial(br_name, dst);
>      }
> +
>      return res;
>  }
>  
> @@ -142,6 +168,7 @@ tnl_neigh_set(const char name[IFNAMSIZ], const struct 
> in6_addr *dst,
>  {
>      ovs_mutex_lock(&mutex);
>      struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
> +    bool insert = true;
>      if (neigh) {
>          if (eth_addr_equals(neigh->mac, mac)) {
>              atomic_store_relaxed(&neigh->expires, time_msec() +
> @@ -149,18 +176,30 @@ tnl_neigh_set(const char name[IFNAMSIZ], const struct 
> in6_addr *dst,
>              ovs_mutex_unlock(&mutex);
>              return;
>          }
> -        tnl_neigh_delete(neigh);
> +        if (tnl_neigh_is_complete(neigh)) {
> +            tnl_neigh_delete(neigh);
> +        } else {
> +            insert = false;
> +        }
>      }
> -    seq_change(tnl_conf_seq);
>  
> -    neigh = xmalloc(sizeof *neigh);
> +    if (insert) {
> +        neigh = xmalloc(sizeof *neigh);
> +
> +        neigh->ip = *dst;
> +        ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
> +        insert = true;
> +    }
> +    seq_change(tnl_conf_seq);
>  
> -    neigh->ip = *dst;
>      neigh->mac = mac;
>      atomic_store_relaxed(&neigh->expires, time_msec() +
>                           tnl_neigh_get_aging());
> -    ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
> -    cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
> +    atomic_store_relaxed(&neigh->complete, true);
> +
> +    if (insert) {
> +        cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
> +    }
>      ovs_mutex_unlock(&mutex);
>  }
>  
> @@ -172,6 +211,37 @@ tnl_arp_set(const char name[IFNAMSIZ], ovs_be32 dst,
>      tnl_neigh_set(name, &dst6, mac);
>  }
>  
> +void
> +tnl_neigh_set_partial(const char name[IFNAMSIZ], const struct in6_addr *dst)
> +{
> +    ovs_mutex_lock(&mutex);
> +    struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
> +    if (neigh) {
> +        if (!tnl_neigh_is_complete(neigh)) {
> +            atomic_store_relaxed(&neigh->expires, time_msec() +
> +                                 NEIGH_ENTRY_LOOKUP_HOLDOUT_MS);
> +            goto done;
> +        } else if (!tnl_neigh_expired(neigh)) {
> +            /* Case where initial lookup did not find a complete entry but it
> +             * was inserted before we aquired a lock. */
> +            goto done;
> +        }
> +        tnl_neigh_delete(neigh);
> +    }
> +    seq_change(tnl_conf_seq);
> +
> +    neigh = xmalloc(sizeof *neigh);
> +
> +    neigh->ip = *dst;
> +    neigh->complete = false;
> +    neigh->expires = time_msec() + NEIGH_ENTRY_LOOKUP_HOLDOUT_MS;
> +    ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
> +    cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
> +
> +done:
> +    ovs_mutex_unlock(&mutex);
> +}
> +
>  static int
>  tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
>                const char name[IFNAMSIZ], bool allow_update)
> @@ -381,6 +451,9 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc 
> OVS_UNUSED,
>      CMAP_FOR_EACH(neigh, cmap_node, &table) {
>          int start_len, need_ws;
>  
> +        if (!tnl_neigh_is_complete(neigh)) {
> +            continue;
> +        }
>          start_len = ds.length;
>          ipv6_format_mapped(&neigh->ip, &ds);
>  
> diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h
> index 877bca312..03bd3e07c 100644
> --- a/lib/tnl-neigh-cache.h
> +++ b/lib/tnl-neigh-cache.h
> @@ -36,7 +36,7 @@ int tnl_neigh_snoop(const struct flow *flow, struct 
> flow_wildcards *wc,
>  void tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst,
>                     const struct eth_addr mac);
>  int tnl_neigh_lookup(const char dev_name[IFNAMSIZ], const struct in6_addr 
> *dst,
> -                     struct eth_addr *mac);
> +                     struct eth_addr *mac, bool readonly);
>  void tnl_neigh_cache_init(void);
>  void tnl_neigh_cache_run(void);
>  void tnl_neigh_flush(const char dev_name[IFNAMSIZ]);
> diff --git a/ofproto/ofproto-dpif-xlate-cache.c 
> b/ofproto/ofproto-dpif-xlate-cache.c
> index c6d935cf0..7dda67833 100644
> --- a/ofproto/ofproto-dpif-xlate-cache.c
> +++ b/ofproto/ofproto-dpif-xlate-cache.c
> @@ -152,7 +152,7 @@ xlate_push_stats_entry(struct xc_entry *entry,
>      case XC_TNL_NEIGH:
>          /* Lookup neighbor to avoid timeout. */
>          tnl_neigh_lookup(entry->tnl_neigh_cache.br_name,
> -                         &entry->tnl_neigh_cache.d_ipv6, &dmac);
> +                         &entry->tnl_neigh_cache.d_ipv6, &dmac, true);
>          break;
>      case XC_TUNNEL_HEADER:
>          if (entry->tunnel_hdr.operation == ADD) {
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index f2db4723b..de068d6f4 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -72,6 +72,7 @@
>  COVERAGE_DEFINE(xlate_actions);
>  COVERAGE_DEFINE(xlate_actions_oversize);
>  COVERAGE_DEFINE(xlate_actions_too_many_output);
> +COVERAGE_DEFINE(xlate_actions_sent_packet);
>  
>  VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
>  
> @@ -3931,17 +3932,26 @@ native_tunnel_output(struct xlate_ctx *ctx, const 
> struct xport *xport,
>          s_ip = in6_addr_get_mapped_ipv4(&s_ip6);
>      }
>  
> -    err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac);
> +    err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac, false);
>      if (err) {
>          struct in6_addr nh_s_ip6 = in6addr_any;
>  
>          put_cloned_drop_action(ctx->xbridge->ofproto, ctx->odp_actions,
>                                 XLATE_TUNNEL_NEIGH_CACHE_MISS,
>                                 !is_last_action);
> +        if (err == EINPROGRESS) {
> +            xlate_report(ctx, OFT_DETAIL,
> +                         "neighbor cache miss for %s on bridge %s, "
> +                         "waiting on %s request",
> +                         buf_dip6, out_dev->xbridge->name,
> +                         d_ip ? "ARP" : "ND");
> +            return err;
> +        }
>          xlate_report(ctx, OFT_DETAIL,
>                       "neighbor cache miss for %s on bridge %s, "
>                       "sending %s request",
>                       buf_dip6, out_dev->xbridge->name, d_ip ? "ARP" : "ND");
> +        COVERAGE_INC(xlate_actions_sent_packet);
>  
>          err = ovs_router_get_netdev_source_address(
>              &d_ip6, netdev_get_name(out_dev->netdev), &nh_s_ip6);
> diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at
> index b11387345..7263bf4f0 100644
> --- a/tests/tunnel-push-pop-ipv6.at
> +++ b/tests/tunnel-push-pop-ipv6.at
> @@ -344,7 +344,20 @@ AT_CHECK([ovs-ofctl add-flow br0 action=normal])
>  dnl Check Neighbour discovery.
>  AT_CHECK([ovs-vsctl -- set Interface p0 options:pcap=p0.pcap])
>  
> -AT_CHECK([ovs-appctl netdev-dummy/receive int-br 
> 'in_port(2),eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)'])
> +dnl First trace should send only two ND lookups, for each destination IP
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
> +  [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
> +  [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
> +  [icmp(type=0,code=0)"], [0], [stdout])
> +AT_CHECK([grep -q "sending ND request" stdout], [0])
> +
> +dnl Second trace should not send any ND lookups
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
> +  [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
> +  [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
> +  [icmp(type=0,code=0)"], [0], [stdout])
> +AT_CHECK([grep -q "waiting on ND request" stdout], [0])
> +AT_CHECK([grep -qv "sending ND request" stdout], [0])
>  
>  dnl Wait for the two Neighbor Solicitation packets to be sent.
>  dnl Sometimes the system can be slow (e.g. under valgrind)
> @@ -577,8 +590,8 @@ 
> icmp,vlan_tci=0x0000,dl_src=be:b6:f4:e1:49:4a,dl_dst=fe:71:d8:83:72:4f,nw_src=30
>  AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port  5'], [0], [dnl
>    port  5: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=?
>  ])
> -AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], 
> [dnl
> -recirc_id(0),tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(+key)),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
>  packets:0, bytes:0, used:never, 
> actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=3,rule_cookie=0,controller_id=0,max_len=65535))
> +AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)' | 
> strip_recirc], [0], [dnl
> +recirc_id(<recirc>),tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(+key)),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
>  packets:0, bytes:0, used:never, 
> actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=<recirc>,rule_cookie=0,controller_id=0,max_len=65535))
>  ])
>  
>  dnl Receive VXLAN with different MAC and verify that the neigh cache gets 
> updated
> diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
> index f22a37570..e7211149b 100644
> --- a/tests/tunnel-push-pop.at
> +++ b/tests/tunnel-push-pop.at
> @@ -256,7 +256,20 @@ AT_CHECK([ovs-appctl revalidator/wait])
>  dnl Check ARP request
>  AT_CHECK([ovs-vsctl -- set Interface p0 options:pcap=p0.pcap])
>  
> -AT_CHECK([ovs-appctl netdev-dummy/receive int-br 
> 'in_port(2),eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)'])
> +dnl First trace should send only two ARP lookups, for each destination IP
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
> +  [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
> +  [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
> +  [icmp(type=0,code=0)"], [0], [stdout])
> +AT_CHECK([grep -q "sending ARP request" stdout], [0])
> +
> +dnl Second trace should not send any ARP lookups
> +AT_CHECK([[ovs-appctl ofproto/trace ovs-dummy "in_port(int-br),]dnl
> +  [eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),]dnl
> +  [ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),]dnl
> +  [icmp(type=0,code=0)"]], [0], [stdout])
> +AT_CHECK([grep -q "waiting on ARP request" stdout], [0])
> +AT_CHECK([grep -qv "sending ARP request" stdout], [0])
>  
>  dnl Wait for the two ARP requests to be sent. Sometimes the system
>  dnl can be slow (e.g. under valgrind)

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to