On Tue, May 16, 2023 at 1:29 PM Dumitru Ceara <[email protected]> wrote:

> On 5/15/23 12:46, Ales Musil wrote:
> > There was a race within packet buffering that could
> > result in first packt being dropped. It could happen
> > under following conditions and topology:
> > S1 == R1 == public == R2 == S2
> > SNAT on R1 and DGP on port connecting R1 with public.
> >
> > 1) The GARP is sent for the DGP SNAT
> > 2) The GARP is delayed on R2 because it's multicast
> > 3) The MAC binding is added to SB
> > 4) Some traffic that gets buffered on S2
> > 5) An ARP is sent as consequence of the buffering
> > 6) Response for the ARP is ignored on lflow level,
> > because the MAC binding already exists
> > 7) The buffered packet is never sent out and times out
> >
> > In order to prevent the race add additonal lookup for the
> > buffered packets. When the packet is created do initial
> > lookup right away as there is a chance that the MAC binding
> > is already present in database. In any other case that would
> > prevent the tracked loop to provide correct information do
> > SB lookup, but only after certain timeout.
> >
> > Signed-off-by: Ales Musil <[email protected]>
> > ---
>
> Hi Ales,
>
> >  controller/mac-learn.c | 119 ++++++++++++++++++++++++++++++++++++++---
> >  controller/mac-learn.h |  20 +++++--
> >  controller/pinctrl.c   |  57 +++++++++++---------
> >  3 files changed, 159 insertions(+), 37 deletions(-)
> >
> > diff --git a/controller/mac-learn.c b/controller/mac-learn.c
> > index 296a70242..d27dcf386 100644
> > --- a/controller/mac-learn.c
> > +++ b/controller/mac-learn.c
> > @@ -23,12 +23,16 @@
> >  #include "lib/packets.h"
> >  #include "lib/smap.h"
> >  #include "lib/timeval.h"
> > +#include "lport.h"
> > +#include "ovn-sb-idl.h"
> >
> >  VLOG_DEFINE_THIS_MODULE(mac_learn);
> >
> > -#define MAX_FDB_ENTRIES      1000
> > -#define MAX_BUFFERED_PACKETS 1000
> > -#define BUFFER_QUEUE_DEPTH   4
> > +#define MAX_FDB_ENTRIES             1000
> > +#define MAX_BUFFERED_PACKETS        1000
> > +#define BUFFER_QUEUE_DEPTH          4
> > +#define BUFFERED_PACKETS_TIMEOUT_MS 10000
> > +#define BUFFERED_PACKETS_LOOKUP_MS  100
> >
> >  static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key,
> >                             struct in6_addr *ip);
> > @@ -45,6 +49,13 @@ buffered_packets_find(struct buffered_packets_ctx
> *ctx, uint64_t dp_key,
> >                        uint64_t port_key, struct in6_addr *ip, uint32_t
> hash);
> >  static void ovn_buffered_packets_remove(struct buffered_packets_ctx
> *ctx,
> >                                          struct buffered_packets *bp);
> > +static void
> > +buffered_packets_db_lookup(struct buffered_packets *bp,
> > +                           struct ds *ip, struct eth_addr *mac,
> > +                           struct ovsdb_idl_index *sbrec_pb_by_key,
> > +                           struct ovsdb_idl_index *sbrec_dp_by_key,
> > +                           struct ovsdb_idl_index *sbrec_pb_by_name,
> > +                           struct ovsdb_idl_index
> *sbrec_mb_by_lport_ip);
> >
> >  /* mac_binding functions. */
> >  void
> > @@ -121,6 +132,23 @@ ovn_mac_binding_timed_out(const struct mac_binding
> *mb, long long now)
> >      return now >= mb->timeout_at_ms;
> >  }
> >
> > +const struct sbrec_mac_binding *
> > +ovn_mac_binding_lookup(struct ovsdb_idl_index
> *sbrec_mac_binding_by_lport_ip,
> > +                       const char *logical_port, const char *ip)
> > +{
> > +    struct sbrec_mac_binding *mb =
> > +        sbrec_mac_binding_index_init_row(sbrec_mac_binding_by_lport_ip);
> > +    sbrec_mac_binding_index_set_logical_port(mb, logical_port);
> > +    sbrec_mac_binding_index_set_ip(mb, ip);
> > +
> > +    const struct sbrec_mac_binding *retval =
> > +        sbrec_mac_binding_index_find(sbrec_mac_binding_by_lport_ip, mb);
> > +
> > +    sbrec_mac_binding_index_destroy_row(mb);
> > +
> > +    return retval;
> > +}
> > +
> >  /* fdb functions. */
> >  void
> >  ovn_fdb_init(struct hmap *fdbs)
> > @@ -200,6 +228,7 @@ ovn_buffered_packets_add(struct buffered_packets_ctx
> *ctx, uint64_t dp_key,
> >      struct buffered_packets *bp;
> >
> >      uint32_t hash = keys_ip_hash(dp_key, port_key, &ip);
> > +    long long now = time_msec();
> >
> >      bp = buffered_packets_find(ctx, dp_key, port_key, &ip, hash);
> >      if (!bp) {
> > @@ -212,10 +241,13 @@ ovn_buffered_packets_add(struct
> buffered_packets_ctx *ctx, uint64_t dp_key,
> >          bp->ip = ip;
> >          bp->dp_key = dp_key;
> >          bp->port_key = port_key;
> > +        /* Schedule the freshly added buffered packet to do lookup
> > +         * immediately. */
> > +        bp->lookup_at_ms = 0;
> >          ovs_list_init(&bp->queue);
> >      }
> >
> > -    bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT_MS;
> > +    bp->expire_at_ms = now + BUFFERED_PACKETS_TIMEOUT_MS;
> >
> >      return bp;
> >  }
> > @@ -235,15 +267,22 @@ ovn_buffered_packets_packet_data_enqueue(struct
> buffered_packets *bp,
> >
> >  void
> >  ovn_buffered_packets_ctx_run(struct buffered_packets_ctx *ctx,
> > -                             const struct mac_bindings_map *recent_mbs)
> > +                             const struct mac_bindings_map *recent_mbs,
> > +                             struct ovsdb_idl_index *sbrec_pb_by_key,
> > +                             struct ovsdb_idl_index *sbrec_dp_by_key,
> > +                             struct ovsdb_idl_index *sbrec_pb_by_name,
> > +                             struct ovsdb_idl_index
> *sbrec_mb_by_lport_ip)
> >  {
> > +    struct ds ip = DS_EMPTY_INITIALIZER;
> >      long long now = time_msec();
> > +    ctx->next_lookup_at_ms = LLONG_MAX;
> >
> >      struct buffered_packets *bp;
> >
> >      HMAP_FOR_EACH_SAFE (bp, hmap_node, &ctx->buffered_packets) {
> > +        struct eth_addr mac = eth_addr_zero;
> >          /* Remove expired buffered packets. */
> > -        if (now > bp->expire) {
> > +        if (now > bp->expire_at_ms) {
>
> I think this should be:
>
>         if (now >= bp->expire_at_ms) {
>
> Otherwise there's a chance we don't do an immediate lookup for newly
> buffered packets.
>

This is for the timeout when the buffered packets that did not find match
should be removed.
As discussed offline it can stay as is.


>
> >              ovn_buffered_packets_remove(ctx, bp);
> >              continue;
> >          }
> > @@ -252,20 +291,36 @@ ovn_buffered_packets_ctx_run(struct
> buffered_packets_ctx *ctx,
> >          struct mac_binding *mb = mac_binding_find(recent_mbs,
> bp->dp_key,
> >                                                    bp->port_key,
> &bp->ip, hash);
> >
> > -        if (!mb) {
> > +        if (mb) {
> > +            mac = mb->mac;
> > +        } else if (now >= bp->lookup_at_ms) {
> > +            /* Check if we can do a full lookup. */
> > +            buffered_packets_db_lookup(bp, &ip, &mac, sbrec_pb_by_key,
> > +                                       sbrec_dp_by_key,
> sbrec_pb_by_name,
> > +                                       sbrec_mb_by_lport_ip);
> > +            /* Schedule next lookup even if we found the MAC address,
> > +             * if the address was found this struct will be deleted
> anyway. */
> > +            bp->lookup_at_ms = now + BUFFERED_PACKETS_LOOKUP_MS;
> > +        }
> > +
> > +        if (eth_addr_is_zero(mac)) {
> > +            ctx->next_lookup_at_ms = MIN(ctx->next_lookup_at_ms,
> > +                                         bp->lookup_at_ms);
> >              continue;
> >          }
> >
> >          struct packet_data *pd;
> >          LIST_FOR_EACH_POP (pd, node, &bp->queue) {
> >              struct eth_header *eth = dp_packet_data(pd->p);
> > -            eth->eth_dst = mb->mac;
> > +            eth->eth_dst = mac;
> >
> >              ovs_list_push_back(&ctx->ready_packets_data, &pd->node);
> >          }
> >
> >          ovn_buffered_packets_remove(ctx, bp);
> >      }
>
> I think there's a chance for constantly waking up in case we have
> continuous traffic for a destination that's not reachable.  I think I'd
> add this:
>
>     /* Don't retry lookup for at least BUFFERED_PACKETS_LOOKUP_MS. */
>     ctx->next_lookup_at_ms = MAX(ctx->next_lookup_at_ms,
>                                  now + BUFFERED_PACKETS_LOOKUP_MS);
>
> If mac bindings are learned we wake up immediately anyway.  If not,
> there's no point to wake up earlier than 100ms from now.
>
> This brings me to the question: do we actually need
> ctx->next_lookup_at_ms?  Isn't it good enough that we run this loop
> every time a mac binding is learned?  The main thread wakes up for the
> mac binding SB change.
>
> The rest of the code looks good to me.
>
> Regards,
> Dumitru
>
>
Yeah you are right, I'll remove the next_lookup_at_ms.

Thanks,
Ales


-- 

Ales Musil

Senior Software Engineer - OVN Core

Red Hat EMEA <https://www.redhat.com>

[email protected]    IM: amusil
<https://red.ht/sig>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to