On 5/15/23 12:46, Ales Musil wrote:
> There was a race within packet buffering that could
> result in first packt being dropped. It could happen
> under following conditions and topology:
> S1 == R1 == public == R2 == S2
> SNAT on R1 and DGP on port connecting R1 with public.
> 
> 1) The GARP is sent for the DGP SNAT
> 2) The GARP is delayed on R2 because it's multicast
> 3) The MAC binding is added to SB
> 4) Some traffic that gets buffered on S2
> 5) An ARP is sent as consequence of the buffering
> 6) Response for the ARP is ignored on lflow level,
> because the MAC binding already exists
> 7) The buffered packet is never sent out and times out
> 
> In order to prevent the race add additonal lookup for the
> buffered packets. When the packet is created do initial
> lookup right away as there is a chance that the MAC binding
> is already present in database. In any other case that would
> prevent the tracked loop to provide correct information do
> SB lookup, but only after certain timeout.
> 
> Signed-off-by: Ales Musil <[email protected]>
> ---

Hi Ales,

>  controller/mac-learn.c | 119 ++++++++++++++++++++++++++++++++++++++---
>  controller/mac-learn.h |  20 +++++--
>  controller/pinctrl.c   |  57 +++++++++++---------
>  3 files changed, 159 insertions(+), 37 deletions(-)
> 
> diff --git a/controller/mac-learn.c b/controller/mac-learn.c
> index 296a70242..d27dcf386 100644
> --- a/controller/mac-learn.c
> +++ b/controller/mac-learn.c
> @@ -23,12 +23,16 @@
>  #include "lib/packets.h"
>  #include "lib/smap.h"
>  #include "lib/timeval.h"
> +#include "lport.h"
> +#include "ovn-sb-idl.h"
>  
>  VLOG_DEFINE_THIS_MODULE(mac_learn);
>  
> -#define MAX_FDB_ENTRIES      1000
> -#define MAX_BUFFERED_PACKETS 1000
> -#define BUFFER_QUEUE_DEPTH   4
> +#define MAX_FDB_ENTRIES             1000
> +#define MAX_BUFFERED_PACKETS        1000
> +#define BUFFER_QUEUE_DEPTH          4
> +#define BUFFERED_PACKETS_TIMEOUT_MS 10000
> +#define BUFFERED_PACKETS_LOOKUP_MS  100
>  
>  static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key,
>                             struct in6_addr *ip);
> @@ -45,6 +49,13 @@ buffered_packets_find(struct buffered_packets_ctx *ctx, 
> uint64_t dp_key,
>                        uint64_t port_key, struct in6_addr *ip, uint32_t hash);
>  static void ovn_buffered_packets_remove(struct buffered_packets_ctx *ctx,
>                                          struct buffered_packets *bp);
> +static void
> +buffered_packets_db_lookup(struct buffered_packets *bp,
> +                           struct ds *ip, struct eth_addr *mac,
> +                           struct ovsdb_idl_index *sbrec_pb_by_key,
> +                           struct ovsdb_idl_index *sbrec_dp_by_key,
> +                           struct ovsdb_idl_index *sbrec_pb_by_name,
> +                           struct ovsdb_idl_index *sbrec_mb_by_lport_ip);
>  
>  /* mac_binding functions. */
>  void
> @@ -121,6 +132,23 @@ ovn_mac_binding_timed_out(const struct mac_binding *mb, 
> long long now)
>      return now >= mb->timeout_at_ms;
>  }
>  
> +const struct sbrec_mac_binding *
> +ovn_mac_binding_lookup(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> +                       const char *logical_port, const char *ip)
> +{
> +    struct sbrec_mac_binding *mb =
> +        sbrec_mac_binding_index_init_row(sbrec_mac_binding_by_lport_ip);
> +    sbrec_mac_binding_index_set_logical_port(mb, logical_port);
> +    sbrec_mac_binding_index_set_ip(mb, ip);
> +
> +    const struct sbrec_mac_binding *retval =
> +        sbrec_mac_binding_index_find(sbrec_mac_binding_by_lport_ip, mb);
> +
> +    sbrec_mac_binding_index_destroy_row(mb);
> +
> +    return retval;
> +}
> +
>  /* fdb functions. */
>  void
>  ovn_fdb_init(struct hmap *fdbs)
> @@ -200,6 +228,7 @@ ovn_buffered_packets_add(struct buffered_packets_ctx 
> *ctx, uint64_t dp_key,
>      struct buffered_packets *bp;
>  
>      uint32_t hash = keys_ip_hash(dp_key, port_key, &ip);
> +    long long now = time_msec();
>  
>      bp = buffered_packets_find(ctx, dp_key, port_key, &ip, hash);
>      if (!bp) {
> @@ -212,10 +241,13 @@ ovn_buffered_packets_add(struct buffered_packets_ctx 
> *ctx, uint64_t dp_key,
>          bp->ip = ip;
>          bp->dp_key = dp_key;
>          bp->port_key = port_key;
> +        /* Schedule the freshly added buffered packet to do lookup
> +         * immediately. */
> +        bp->lookup_at_ms = 0;
>          ovs_list_init(&bp->queue);
>      }
>  
> -    bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT_MS;
> +    bp->expire_at_ms = now + BUFFERED_PACKETS_TIMEOUT_MS;
>  
>      return bp;
>  }
> @@ -235,15 +267,22 @@ ovn_buffered_packets_packet_data_enqueue(struct 
> buffered_packets *bp,
>  
>  void
>  ovn_buffered_packets_ctx_run(struct buffered_packets_ctx *ctx,
> -                             const struct mac_bindings_map *recent_mbs)
> +                             const struct mac_bindings_map *recent_mbs,
> +                             struct ovsdb_idl_index *sbrec_pb_by_key,
> +                             struct ovsdb_idl_index *sbrec_dp_by_key,
> +                             struct ovsdb_idl_index *sbrec_pb_by_name,
> +                             struct ovsdb_idl_index *sbrec_mb_by_lport_ip)
>  {
> +    struct ds ip = DS_EMPTY_INITIALIZER;
>      long long now = time_msec();
> +    ctx->next_lookup_at_ms = LLONG_MAX;
>  
>      struct buffered_packets *bp;
>  
>      HMAP_FOR_EACH_SAFE (bp, hmap_node, &ctx->buffered_packets) {
> +        struct eth_addr mac = eth_addr_zero;
>          /* Remove expired buffered packets. */
> -        if (now > bp->expire) {
> +        if (now > bp->expire_at_ms) {

I think this should be:

        if (now >= bp->expire_at_ms) {

Otherwise there's a chance we don't do an immediate lookup for newly
buffered packets.

>              ovn_buffered_packets_remove(ctx, bp);
>              continue;
>          }
> @@ -252,20 +291,36 @@ ovn_buffered_packets_ctx_run(struct 
> buffered_packets_ctx *ctx,
>          struct mac_binding *mb = mac_binding_find(recent_mbs, bp->dp_key,
>                                                    bp->port_key, &bp->ip, 
> hash);
>  
> -        if (!mb) {
> +        if (mb) {
> +            mac = mb->mac;
> +        } else if (now >= bp->lookup_at_ms) {
> +            /* Check if we can do a full lookup. */
> +            buffered_packets_db_lookup(bp, &ip, &mac, sbrec_pb_by_key,
> +                                       sbrec_dp_by_key, sbrec_pb_by_name,
> +                                       sbrec_mb_by_lport_ip);
> +            /* Schedule next lookup even if we found the MAC address,
> +             * if the address was found this struct will be deleted anyway. 
> */
> +            bp->lookup_at_ms = now + BUFFERED_PACKETS_LOOKUP_MS;
> +        }
> +
> +        if (eth_addr_is_zero(mac)) {
> +            ctx->next_lookup_at_ms = MIN(ctx->next_lookup_at_ms,
> +                                         bp->lookup_at_ms);
>              continue;
>          }
>  
>          struct packet_data *pd;
>          LIST_FOR_EACH_POP (pd, node, &bp->queue) {
>              struct eth_header *eth = dp_packet_data(pd->p);
> -            eth->eth_dst = mb->mac;
> +            eth->eth_dst = mac;
>  
>              ovs_list_push_back(&ctx->ready_packets_data, &pd->node);
>          }
>  
>          ovn_buffered_packets_remove(ctx, bp);
>      }

I think there's a chance for constantly waking up in case we have
continuous traffic for a destination that's not reachable.  I think I'd
add this:

    /* Don't retry lookup for at least BUFFERED_PACKETS_LOOKUP_MS. */
    ctx->next_lookup_at_ms = MAX(ctx->next_lookup_at_ms,
                                 now + BUFFERED_PACKETS_LOOKUP_MS);

If mac bindings are learned we wake up immediately anyway.  If not,
there's no point to wake up earlier than 100ms from now.

This brings me to the question: do we actually need
ctx->next_lookup_at_ms?  Isn't it good enough that we run this loop
every time a mac binding is learned?  The main thread wakes up for the
mac binding SB change.

The rest of the code looks good to me.

Regards,
Dumitru

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to