On Wed, Apr 26, 2023 at 12:55 PM Dumitru Ceara <[email protected]> wrote:

> On 3/28/23 10:12, Ales Musil wrote:
> > Simplify the logic behind packet buffering
> > and house those simplified helpers
> > in mac-learn.c.
> >
> > Signed-off-by: Ales Musil <[email protected]>
> > ---
> > v4: Rebase on top of current main.
> >     Split into three patches.
> > ---
>
> Hi, Ales,
>

> Thanks for working on this and sorry for not reviewing it earlier.
>
> I think it's indeed good that you split this in 3 patches but it
> would've probably been better to further separate this first patch into
> - first simplify the code without moving it to a different file
> - then, in a second patch, move the code to the new file.
>
> Functionally your change seems to be OK but I left a few more comments
> inline.
>
> >  controller/mac-learn.c | 171 ++++++++++++++++++---
> >  controller/mac-learn.h |  50 ++++++-
> >  controller/pinctrl.c   | 329 ++++++++++++-----------------------------
> >  3 files changed, 298 insertions(+), 252 deletions(-)
> >
> > diff --git a/controller/mac-learn.c b/controller/mac-learn.c
> > index a27607016..9d51693a7 100644
> > --- a/controller/mac-learn.c
> > +++ b/controller/mac-learn.c
> > @@ -18,10 +18,10 @@
> >  #include "mac-learn.h"
> >
> >  /* OpenvSwitch lib includes. */
> > +#include "dp-packet.h"
>
> No need for this, it's already included in mac-learn.h.
>
> >  #include "openvswitch/poll-loop.h"
> >  #include "openvswitch/vlog.h"
> >  #include "lib/packets.h"
> > -#include "lib/random.h"
> >  #include "lib/smap.h"
> >  #include "lib/timeval.h"
> >
> > @@ -29,11 +29,11 @@ VLOG_DEFINE_THIS_MODULE(mac_learn);
> >
> >  #define MAX_MAC_BINDINGS 1000
> >  #define MAX_FDB_ENTRIES  1000
> > -#define MAX_MAC_BINDING_DELAY_MSEC 50
> > +#define BUFFER_QUEUE_DEPTH 4
>
> Super nit: I would've aligned the values, e.g.:
>
> #define MAX_MAC_BINDINGS   1000
> #define MAX_FDB_ENTRIES    1000
> #define BUFFER_QUEUE_DEPTH    4
>
> >
> > -static size_t mac_binding_hash(uint32_t dp_key, uint32_t port_key,
> > -                               struct in6_addr *);
> > -static struct mac_binding *mac_binding_find(struct hmap *mac_bindings,
> > +static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key,
> > +                           struct in6_addr *);
> > +static struct mac_binding *mac_binding_find(const struct hmap
> *mac_bindings,
> >                                              uint32_t dp_key,
> >                                              uint32_t port_key,
> >                                              struct in6_addr *ip, size_t
> hash);
> > @@ -41,6 +41,12 @@ static size_t fdb_entry_hash(uint32_t dp_key, struct
> eth_addr *);
> >
> >  static struct fdb_entry *fdb_entry_find(struct hmap *fdbs, uint32_t
> dp_key,
> >                                          struct eth_addr *mac, size_t
> hash);
> > +static struct buffered_packets * buffered_packets_find(struct hmap
> *hmap,
>
> Nit: extra space after '*'.
>
> > +                                                       uint64_t dp_key,
> > +                                                       uint64_t
> port_key,
> > +                                                       struct in6_addr
> *ip,
> > +                                                       uint32_t hash);
> > +static void buffered_packets_destroy(struct buffered_packets *bp);
> >
> >  /* mac_binding functions. */
> >  void
> > @@ -62,24 +68,23 @@ ovn_mac_bindings_destroy(struct hmap *mac_bindings)
> >  struct mac_binding *
> >  ovn_mac_binding_add(struct hmap *mac_bindings, uint32_t dp_key,
> >                      uint32_t port_key, struct in6_addr *ip,
> > -                    struct eth_addr mac, bool is_unicast)
> > +                    struct eth_addr mac, uint32_t timestamp_offset,
> > +                    bool limited_capacity)
>
> This API really needs a comment now.  Without looking at the
> implementation it's really hard to guess 'limited_capacity' might mean.
>
> Thinking more about it, this API is actually not really OK.  Reading
> more into the patch I think what you actually need is a
> "mac_binding_map" that may have a maximum size.  I'd define a type like
> that and specify the max_size in its _create() function.  Then you can
> transparently take care of a potential limited capacity inside the
> ovn_mac_binding_add() API without cluttering the caller.
>
> >  {
> > -    uint32_t hash = mac_binding_hash(dp_key, port_key, ip);
> > +    uint32_t hash = keys_ip_hash(dp_key, port_key, ip);
> >
> >      struct mac_binding *mb =
> >          mac_binding_find(mac_bindings, dp_key, port_key, ip, hash);
> >      if (!mb) {
> > -        if (hmap_count(mac_bindings) >= MAX_MAC_BINDINGS) {
> > +        if (limited_capacity && hmap_count(mac_bindings) >=
> MAX_MAC_BINDINGS) {
> >              return NULL;
> >          }
> >
> > -        uint32_t delay = is_unicast
> > -            ? 0 : random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1;
> >          mb = xmalloc(sizeof *mb);
> >          mb->dp_key = dp_key;
> >          mb->port_key = port_key;
> >          mb->ip = *ip;
> > -        mb->commit_at_ms = time_msec() + delay;
> > +        mb->expire = time_msec() + timestamp_offset;
> >          hmap_insert(mac_bindings, &mb->hmap_node, hash);
> >      }
> >      mb->mac = mac;
> > @@ -94,7 +99,7 @@ ovn_mac_binding_wait(struct hmap *mac_bindings)
> >      struct mac_binding *mb;
> >
> >      HMAP_FOR_EACH (mb, hmap_node, mac_bindings) {
> > -        poll_timer_wait_until(mb->commit_at_ms);
> > +        poll_timer_wait_until(mb->expire);
> >      }
> >  }
> >
> > @@ -106,9 +111,9 @@ ovn_mac_binding_remove(struct mac_binding *mb,
> struct hmap *mac_bindings)
> >  }
> >
> >  bool
> > -ovn_mac_binding_can_commit(const struct mac_binding *mb, long long now)
> > +ovn_mac_binding_is_expired(const struct mac_binding *mb, long long now)
> >  {
> > -    return now >= mb->commit_at_ms;
> > +    return now >= mb->expire;
> >  }
> >
> >  /* fdb functions. */
> > @@ -158,17 +163,126 @@ ovn_fdb_add(struct hmap *fdbs, uint32_t dp_key,
> struct eth_addr mac,
> >
> >  }
> >
> > +/* packet buffering functions */
> > +struct buffered_packets *
> > +ovn_buffered_packets_add(struct hmap *hmap, uint64_t dp_key, uint64_t
> port_key,
> > +                         struct in6_addr ip)
> > +{
> > +    struct buffered_packets *bp;
> > +
> > +    uint32_t hash = keys_ip_hash(dp_key, port_key, &ip);
> > +    bp = buffered_packets_find(hmap, dp_key, port_key, &ip, hash);
> > +    if (!bp) {
> > +        if (hmap_count(hmap) >= 1000) {
> > +            return NULL;
> > +        }
> > +
> > +        bp = xmalloc(sizeof *bp);
> > +        hmap_insert(hmap, &bp->hmap_node, hash);
> > +        bp->ip = ip;
> > +        bp->dp_key = dp_key;
> > +        bp->port_key = port_key;
> > +        ovs_list_init(&bp->queue);
> > +    }
> > +
> > +    bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT;
> > +
> > +    return bp;
> > +}
> > +
> > +void
> > +ovn_buffered_packets_add_packet_data(struct buffered_packets *bp,
> > +                                     struct ofpbuf ofpacts,
> > +                                     struct dp_packet *packet)
>
> Please add a comment for this public API.
>
> > +{
> > +    struct packet_data *pd = xmalloc(sizeof *pd);
> > +
> > +    pd->ofpacts = ofpacts;
> > +    pd->p = packet;
> > +
> > +    if (ovs_list_size(&bp->queue) == BUFFER_QUEUE_DEPTH) {
> > +        struct packet_data *p =
> CONTAINER_OF(ovs_list_pop_front(&bp->queue),
> > +                                             struct packet_data, node);
> > +        ovn_packet_data_destroy(p);
> > +    }
> > +
> > +    ovs_list_push_back(&bp->queue, &pd->node);
> > +}
> > +
> > +void
> > +ovn_buffured_packets_prepare_ready(struct hmap *bp_hmap,
>
> Typo: 'buffured'.
>
> > +                                   const struct hmap
> *recent_mac_bindings,
> > +                                   struct ovs_list *ready_packet_data)
>
> Please add a comment for this public API.
>
> > +{
> > +    long long now = time_msec();
> > +
> > +    struct buffered_packets *bp;
> > +    HMAP_FOR_EACH_SAFE (bp, hmap_node, bp_hmap) {
> > +        if (now > bp->expire) {
> > +            hmap_remove(bp_hmap, &bp->hmap_node);
> > +            buffered_packets_destroy(bp);
> > +            continue;
> > +        }
> > +
> > +        uint32_t hash = keys_ip_hash(bp->dp_key, bp->port_key, &bp->ip);
> > +        struct mac_binding *mb = mac_binding_find(recent_mac_bindings,
>
> A better name would be 'bindings_to_skip' instead of
> 'recent_mac_bindings'.  It's irrelevant here why we need to skip them.
>
> OTOH, I wonder why you didn't already remove these in the caller, in
> run_buffered_binding().  I'd prefer if we didn't have to define APIs of
> the form "prepare <something> for all entries EXCEPT <entries>".
>
> Mentioning this change in the commit log would've been nice too.
>
> > +                                                  bp->dp_key,
> bp->port_key,
> > +                                                  &bp->ip, hash);
> > +        if (!mb) {
> > +            continue;
> > +        }
> > +
> > +        struct packet_data *pd;
> > +        LIST_FOR_EACH_POP (pd, node, &bp->queue) {
> > +            struct eth_header *eth = dp_packet_data(pd->p);
> > +            eth->eth_dst = mb->mac;
> > +
> > +            ovs_list_push_back(ready_packet_data, &pd->node);
> > +        }
> > +
> > +        hmap_remove(bp_hmap, &bp->hmap_node);
> > +        buffered_packets_destroy(bp);
> > +    }
> > +}
> > +
> > +void
> > +ovn_packet_data_destroy(struct packet_data *pd)
> > +{
> > +    dp_packet_delete(pd->p);
> > +    ofpbuf_uninit(&pd->ofpacts);
> > +    free(pd);
> > +}
> > +
> > +void
> > +ovn_packet_data_list_destroy(struct ovs_list *list)
> > +{
> > +    struct packet_data *pd;
> > +    LIST_FOR_EACH_POP (pd, node, list) {
> > +        ovn_packet_data_destroy(pd);
> > +    }
> > +}
> > +
> > +void
> > +ovn_buffered_packets_hmap_destroy(struct hmap *hmap)
> > +{
> > +    struct buffered_packets *bp;
> > +    HMAP_FOR_EACH_POP (bp, hmap_node, hmap) {
> > +        buffered_packets_destroy(bp);
> > +    }
> > +}
> > +
> > +
> >  /* mac_binding related static functions. */
> >
> >  static size_t
> > -mac_binding_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr
> *ip)
> > +keys_ip_hash(uint32_t dp_key, uint32_t port_key, struct in6_addr *ip)
> >  {
> >      return hash_bytes(ip, sizeof *ip, hash_2words(dp_key, port_key));
> >  }
> >
> >  static struct mac_binding *
> > -mac_binding_find(struct hmap *mac_bindings, uint32_t dp_key,
> > -                   uint32_t port_key, struct in6_addr *ip, size_t hash)
> > +mac_binding_find(const struct hmap *mac_bindings, uint32_t dp_key,
> > +                 uint32_t port_key, struct in6_addr *ip, size_t hash)
> >  {
> >      struct mac_binding *mb;
> >      HMAP_FOR_EACH_WITH_HASH (mb, hmap_node, hash, mac_bindings) {
> > @@ -203,3 +317,26 @@ fdb_entry_find(struct hmap *fdbs, uint32_t dp_key,
> >
> >      return NULL;
> >  }
> > +
> > +/* packet buffering static functions. */
> > +static struct buffered_packets *
> > +buffered_packets_find(struct hmap *hmap, uint64_t dp_key, uint64_t
> port_key,
> > +                      struct in6_addr *ip, uint32_t hash)
> > +{
> > +    struct buffered_packets *mb;
> > +    HMAP_FOR_EACH_WITH_HASH (mb, hmap_node, hash, hmap) {
> > +        if (mb->dp_key == dp_key && mb->port_key == port_key &&
> > +            IN6_ARE_ADDR_EQUAL(&mb->ip, ip)) {
> > +            return mb;
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static void
> > +buffered_packets_destroy(struct buffered_packets *bp)
> > +{
> > +    ovn_packet_data_list_destroy(&bp->queue);
> > +    free(bp);
> > +}
> > diff --git a/controller/mac-learn.h b/controller/mac-learn.h
> > index 57c50c58b..b6ff28650 100644
> > --- a/controller/mac-learn.h
> > +++ b/controller/mac-learn.h
> > @@ -19,7 +19,11 @@
> >
> >  #include <sys/types.h>
> >  #include <netinet/in.h>
> > +
> > +#include "dp-packet.h"
> >  #include "openvswitch/hmap.h"
> > +#include "openvswitch/list.h"
> > +#include "openvswitch/ofpbuf.h"
> >
> >  struct mac_binding {
> >      struct hmap_node hmap_node; /* In a hmap. */
> > @@ -32,20 +36,22 @@ struct mac_binding {
> >      /* Value. */
> >      struct eth_addr mac;
> >
> > -    /* Timestamp when to commit to SB. */
> > -    long long commit_at_ms;
> > +    /* Timestamp in ms indicating when the MAC binding should expire. */
> > +    long long expire;
> >  };
> >
> >  void ovn_mac_bindings_init(struct hmap *mac_bindings);
> >  void ovn_mac_bindings_destroy(struct hmap *mac_bindings);
> >  void ovn_mac_binding_wait(struct hmap *mac_bindings);
> >  void ovn_mac_binding_remove(struct mac_binding *mb, struct hmap
> *mac_bindings);
> > -bool ovn_mac_binding_can_commit(const struct mac_binding *mb, long long
> now);
> > +bool ovn_mac_binding_is_expired(const struct mac_binding *mb, long long
> now);
> >
> >  struct mac_binding *ovn_mac_binding_add(struct hmap *mac_bindings,
> >                                          uint32_t dp_key, uint32_t
> port_key,
> >                                          struct in6_addr *ip,
> > -                                        struct eth_addr mac, bool
> is_unicast);
> > +                                        struct eth_addr mac,
> > +                                        uint32_t timestamp_offset,
> > +                                        bool limited_capacity);
> >
> >
> >
> > @@ -68,4 +74,40 @@ struct fdb_entry *ovn_fdb_add(struct hmap *fdbs,
> >                                  uint32_t dp_key, struct eth_addr mac,
> >                                  uint32_t port_key);
> >
> > +
> > +struct packet_data {
> > +    struct ovs_list node;
> > +
> > +    struct ofpbuf ofpacts;
> > +    struct dp_packet *p;
> > +};
> > +
> > +struct buffered_packets {
> > +    struct hmap_node hmap_node;
> > +
> > +    struct in6_addr ip;
> > +    uint64_t dp_key;
> > +    uint64_t port_key;
> > +
> > +    struct ovs_list queue;
> > +
> > +    long long int expire;
>
> This used to be 'timestamp', i.e., the time when we buffered the packet.
>  It's not obvious to me why we need to change it.  In any case it should
> be mentioned in the commit log.
>
> > +};
> > +
> > +#define OVN_BUFFERED_PACKETS_TIMEOUT 10000
>
> I know the previous definition didn't do this but now that we're
> changing it I'd also include the unit in the name, e.g.:
> OVN_BUFFERED_PACKETS_TIMEOUT_MS.
>
> > +
> > +struct buffered_packets *ovn_buffered_packets_add(struct hmap *hmap,
> > +                                                  uint64_t dp_key,
> > +                                                  uint64_t port_key,
> > +                                                  struct in6_addr ip);
> > +void ovn_buffered_packets_add_packet_data(struct buffered_packets *bp,
> > +                                          struct ofpbuf ofpacts,
> > +                                          struct dp_packet *packet);
> > +void ovn_buffured_packets_prepare_ready(struct hmap *bp_hmap,
> > +                                        const struct hmap
> *recent_mac_bindings,
> > +                                        struct ovs_list
> *ready_packet_data);
> > +void ovn_packet_data_destroy(struct packet_data *pd);
> > +void ovn_packet_data_list_destroy(struct ovs_list *list);
> > +void ovn_buffered_packets_hmap_destroy(struct hmap *hmap);
> > +
>
> There are a few things I don't really like about this change:
>
> There's no ovn_packet_data_create() API but there is a
> ovn_packet_data_destroy() API.  In general, it's more readable IMO if we
> have both *_create() and *_destroy() APIs for a custom data type.
>
> It seems to me ovn_packet_data_list_destroy() is not really needed.
> What I think we should do is do all the "buffered_packets" and
> "packet_data" processing in a single place, probably in mac-learn.c.
>
> It's not great to use raw types like ovs_list and hmap.  It makes it
> hard to know what the real type is without looking at the
> implementation.  I think I'd just wrap the list and the map in some
> explicit structures, defined in mac-learn.h.  We then can rely on the
> compiler type checking to ensure we don't pass the wrong structure as
> argument.
>
> >  #endif /* OVN_MAC_LEARN_H */
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index 795847729..fcc278c8c 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -178,8 +178,8 @@ struct pinctrl {
> >
> >  static struct pinctrl pinctrl;
> >
> > -static void init_buffered_packets_map(void);
> > -static void destroy_buffered_packets_map(void);
> > +static void init_buffered_packets_data(void);
> > +static void destroy_buffered_packets_data(void);
> >  static void
> >  run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >                       const struct sbrec_mac_binding_table
> *mac_binding_table)
> > @@ -535,7 +535,7 @@ pinctrl_init(void)
> >      init_send_garps_rarps();
> >      init_ipv6_ras();
> >      init_ipv6_prefixd();
> > -    init_buffered_packets_map();
> > +    init_buffered_packets_data();
> >      init_activated_ports();
> >      init_event_table();
> >      ip_mcast_snoop_init();
> > @@ -1416,222 +1416,72 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> >      }
> >  }
> >
> > -struct buffer_info {
> > -    struct ofpbuf ofpacts;
> > -    struct dp_packet *p;
> > -};
> > -
> > -#define BUFFER_QUEUE_DEPTH     4
> > -struct buffered_packets {
> > -    struct hmap_node hmap_node;
> > -    struct ovs_list list;
> > -
> > -    /* key */
> > -    struct in6_addr ip;
> > -    struct eth_addr ea;
> > -
> > -    uint64_t dp_key;
> > -    uint64_t port_key;
> > -
> > -    long long int timestamp;
> > -
> > -    struct buffer_info data[BUFFER_QUEUE_DEPTH];
> > -    uint32_t head, tail;
>
> Please mention this change in the commit log.  We're not using the
> circular buffer anymore.  Instead we're poping the front of the list if
> needed.  In theory this memory access pattern change can influence
> performance.  In practice, given that BUFFER_QUEUE_DEPTH is 4, I don't
> think it matters much though.
>
> > -};
> > -
> >  static struct hmap buffered_packets_map;
> > -static struct ovs_list buffered_mac_bindings;
> > +/* List of 'struct packet_data' that is ready to be sent. */
>
> I would add a comment for both 'buffered_packets_map' and
> 'ready_packets_data'.  Alternatively I'd add a newline before this comment.
>
> > +static struct ovs_list ready_packets_data;
> >
> >  static void
> > -init_buffered_packets_map(void)
> > +init_buffered_packets_data(void)
> >  {
> >      hmap_init(&buffered_packets_map);
> > -    ovs_list_init(&buffered_mac_bindings);
> > -}
> > -
> > -static void
> > -destroy_buffered_packets(struct buffered_packets *bp)
> > -{
> > -    struct buffer_info *bi;
> > -
> > -    while (bp->head != bp->tail) {
> > -        bi = &bp->data[bp->head];
> > -        dp_packet_delete(bi->p);
> > -        ofpbuf_uninit(&bi->ofpacts);
> > -
> > -        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
> > -    }
> > -}
> > -
> > -static void
> > -destroy_buffered_packets_map(void)
> > -{
> > -    struct buffered_packets *bp;
> > -    HMAP_FOR_EACH_SAFE (bp, hmap_node, &buffered_packets_map) {
> > -        destroy_buffered_packets(bp);
> > -        hmap_remove(&buffered_packets_map, &bp->hmap_node);
> > -        free(bp);
> > -    }
> > -    hmap_destroy(&buffered_packets_map);
> > -
> > -    LIST_FOR_EACH_POP (bp, list, &buffered_mac_bindings) {
> > -        destroy_buffered_packets(bp);
> > -        free(bp);
> > -    }
> > -}
> > -
> > -static void
> > -buffered_push_packet(struct buffered_packets *bp,
> > -                     struct dp_packet *packet,
> > -                     const struct match *md)
> > -{
> > -    uint32_t next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH;
> > -    struct buffer_info *bi = &bp->data[bp->tail];
> > -
> > -    ofpbuf_init(&bi->ofpacts, 4096);
> > -
> > -    reload_metadata(&bi->ofpacts, md);
> > -    /* reload pkt_mark field */
> > -    const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK);
> > -    union mf_value pkt_mark_value;
> > -    mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value);
> > -    ofpact_put_set_field(&bi->ofpacts, pkt_mark_field, &pkt_mark_value,
> NULL);
> > -
> > -    struct ofpact_resubmit *resubmit =
> ofpact_put_RESUBMIT(&bi->ofpacts);
> > -    resubmit->in_port = OFPP_CONTROLLER;
> > -    resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
> > -
> > -    bi->p = packet;
> > -
> > -    if (next == bp->head) {
> > -        bi = &bp->data[bp->head];
> > -        dp_packet_delete(bi->p);
> > -        ofpbuf_uninit(&bi->ofpacts);
> > -        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
> > -    }
> > -    bp->tail = next;
> > +    ovs_list_init(&ready_packets_data);
> >  }
> >
> >  static void
> > -buffered_send_packets(struct rconn *swconn, struct buffered_packets *bp,
> > -                      struct eth_addr *addr)
> > -{
> > -    enum ofp_version version = rconn_get_version(swconn);
> > -    enum ofputil_protocol proto =
> ofputil_protocol_from_ofp_version(version);
> > -
> > -    while (bp->head != bp->tail) {
> > -        struct buffer_info *bi = &bp->data[bp->head];
> > -        struct eth_header *eth = dp_packet_data(bi->p);
> > -
> > -        eth->eth_dst = *addr;
> > -        struct ofputil_packet_out po = {
> > -            .packet = dp_packet_data(bi->p),
> > -            .packet_len = dp_packet_size(bi->p),
> > -            .buffer_id = UINT32_MAX,
> > -            .ofpacts = bi->ofpacts.data,
> > -            .ofpacts_len = bi->ofpacts.size,
> > -        };
> > -        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> > -        queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> > -
> > -        ofpbuf_uninit(&bi->ofpacts);
> > -        dp_packet_delete(bi->p);
> > -
> > -        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
> > -    }
> > -}
> > -
> > -#define BUFFER_MAP_TIMEOUT   10000
> > -static void
> > -buffered_packets_map_gc(void)
> > -{
> > -    struct buffered_packets *cur_qp;
> > -    long long int now = time_msec();
> > -
> > -    HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) {
> > -        if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) {
> > -            destroy_buffered_packets(cur_qp);
> > -            hmap_remove(&buffered_packets_map, &cur_qp->hmap_node);
> > -            free(cur_qp);
> > -        }
> > -    }
> > -}
> > -
> > -static uint32_t
> > -pinctrl_buffer_packet_hash(uint64_t dp_key, uint64_t port_key,
> > -                           const struct in6_addr *addr)
> > +destroy_buffered_packets_data(void)
> >  {
> > -    uint32_t hash = 0;
> > -    hash = hash_add64(hash, port_key);
> > -    hash = hash_add64(hash, dp_key);
> > -    hash = hash_bytes(addr, sizeof addr, hash);
> > -    return hash_finish(hash, 16);
> > -}
> > -
> > -static struct buffered_packets *
> > -pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key,
> > -                              const struct in6_addr *ip, uint32_t hash)
> > -{
> > -    struct buffered_packets *qp;
> > -    HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
> &buffered_packets_map) {
> > -        if (qp->dp_key == dp_key && qp->port_key == port_key &&
> > -            IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
> > -            return qp;
> > -        }
> > -    }
> > -    return NULL;
> > -}
> > -
> > -static struct buffered_packets *
> > -pinctrl_find_buffered_packets_with_hash(uint64_t dp_key, uint64_t
> port_key,
> > -                                        const struct in6_addr *ip)
> > -{
> > -    uint32_t hash = pinctrl_buffer_packet_hash(dp_key, port_key, ip);
> > -
> > -    return pinctrl_find_buffered_packets(dp_key, port_key, ip, hash);
> > +    ovn_buffered_packets_hmap_destroy(&buffered_packets_map);
> > +    ovn_packet_data_list_destroy(&ready_packets_data);
> >  }
> >
> >  /* Called with in the pinctrl_handler thread context. */
> > -static int
> > +static void
> >  pinctrl_handle_buffered_packets(struct dp_packet *pkt_in,
> >                                  const struct match *md, bool is_arp)
> >      OVS_REQUIRES(pinctrl_mutex)
> >  {
> > -    struct buffered_packets *bp;
> > +    struct in6_addr ip;
> > +    struct ofpbuf ofpacts;
> >      struct dp_packet *clone;
> > -    struct in6_addr addr;
> > +    struct buffered_packets *bp;
> >      uint64_t dp_key = ntohll(md->flow.metadata);
> >      uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0];
> >
> >      if (is_arp) {
> > -        addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
> > +        ip = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
> >      } else {
> >          ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0));
> > -        memcpy(&addr, &ip6, sizeof addr);
> > +        memcpy(&ip, &ip6, sizeof ip);
> >      }
> >
> > -    uint32_t hash = pinctrl_buffer_packet_hash(dp_key, oport_key,
> &addr);
> > -    bp = pinctrl_find_buffered_packets(dp_key, oport_key, &addr, hash);
> > +    bp = ovn_buffered_packets_add(&buffered_packets_map, dp_key,
> > +                                  oport_key, ip);
> >      if (!bp) {
> > -        if (hmap_count(&buffered_packets_map) >= 1000) {
> > -            COVERAGE_INC(pinctrl_drop_buffered_packets_map);
> > -            return -ENOMEM;
> > -        }
> > -
> > -        bp = xmalloc(sizeof *bp);
> > -        hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
> > -        bp->head = bp->tail = 0;
> > -        bp->ip = addr;
> > -        bp->dp_key = dp_key;
> > -        bp->port_key = oport_key;
> > +        COVERAGE_INC(pinctrl_drop_buffered_packets_map);
> > +        return;
> >      }
> > -    bp->timestamp = time_msec();
> > +
> >      /* clone the packet to send it later with correct L2 address */
> >      clone = dp_packet_clone_data(dp_packet_data(pkt_in),
> >                                   dp_packet_size(pkt_in));
> > -    buffered_push_packet(bp, clone, md);
> >
> > -    return 0;
> > +
> > +    ofpbuf_init(&ofpacts, 4096);
> > +    reload_metadata(&ofpacts, md);
> > +    /* reload pkt_mark field */
> > +    const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK);
> > +    union mf_value pkt_mark_value;
> > +    mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value);
> > +    ofpact_put_set_field(&ofpacts, pkt_mark_field, &pkt_mark_value,
> NULL);
> > +
> > +    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> > +    resubmit->in_port = OFPP_CONTROLLER;
> > +    resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
> > +
> > +    ovn_buffered_packets_add_packet_data(bp, ofpacts, clone);
> > +
> > +    /* There is a chance that the MAC binding was already created. */
> > +    notify_pinctrl_main();
> >  }
> >
> >  /* Called with in the pinctrl_handler thread context. */
> > @@ -4154,7 +4004,7 @@ pinctrl_destroy(void)
> >      destroy_send_garps_rarps();
> >      destroy_ipv6_ras();
> >      destroy_ipv6_prefixd();
> > -    destroy_buffered_packets_map();
> > +    destroy_buffered_packets_data();
> >      destroy_activated_ports();
> >      event_table_destroy();
> >      destroy_put_mac_bindings();
> > @@ -4196,6 +4046,8 @@ destroy_put_mac_bindings(void)
> >      ovn_mac_bindings_destroy(&put_mac_bindings);
> >  }
> >
> > +#define MAX_MAC_BINDING_DELAY_MSEC 50
> > +
> >  /* Called with in the pinctrl_handler thread context. */
> >  static void
> >  pinctrl_handle_put_mac_binding(const struct flow *md,
> > @@ -4216,11 +4068,13 @@ pinctrl_handle_put_mac_binding(const struct flow
> *md,
> >
> >      /* If the ARP reply was unicast we should not delay it,
> >       * there won't be any race. */
> > -    bool is_unicast = !eth_addr_is_multicast(headers->dl_dst);
> > +    uint32_t delay = eth_addr_is_multicast(headers->dl_dst)
> > +                     ? random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1
> > +                     : 0;
> >      struct mac_binding *mb = ovn_mac_binding_add(&put_mac_bindings,
> dp_key,
> >                                                   port_key, &ip_key,
> >                                                   headers->dl_src,
> > -                                                 is_unicast);
> > +                                                 delay, true);
> >      if (!mb) {
> >          COVERAGE_INC(pinctrl_drop_put_mac_binding);
> >          return;
> > @@ -4237,12 +4091,25 @@ static void
> >  send_mac_binding_buffered_pkts(struct rconn *swconn)
> >      OVS_REQUIRES(pinctrl_mutex)
> >  {
> > -    struct buffered_packets *bp;
> > -    LIST_FOR_EACH_POP (bp, list, &buffered_mac_bindings) {
> > -        buffered_send_packets(swconn, bp, &bp->ea);
> > -        free(bp);
> > +    enum ofp_version version = rconn_get_version(swconn);
> > +    enum ofputil_protocol proto =
> ofputil_protocol_from_ofp_version(version);
> > +
> > +    struct packet_data *pd;
> > +    LIST_FOR_EACH_POP (pd, node, &ready_packets_data) {
> > +        struct ofputil_packet_out po = {
> > +            .packet = dp_packet_data(pd->p),
> > +            .packet_len = dp_packet_size(pd->p),
> > +            .buffer_id = UINT32_MAX,
> > +            .ofpacts = pd->ofpacts.data,
> > +            .ofpacts_len = pd->ofpacts.size,
> > +        };
> > +        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> > +        queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> > +
> > +        ovn_packet_data_destroy(pd);
> >      }
> > -    ovs_list_init(&buffered_mac_bindings);
> > +
> > +    ovs_list_init(&ready_packets_data);
> >  }
> >
> >  static const struct sbrec_mac_binding *
> > @@ -4391,7 +4258,7 @@ run_put_mac_bindings(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> >
> >      struct mac_binding *mb;
> >      HMAP_FOR_EACH_SAFE (mb, hmap_node, &put_mac_bindings) {
> > -        if (ovn_mac_binding_can_commit(mb, now)) {
> > +        if (ovn_mac_binding_is_expired(mb, now)) {
>
> This is really confusing.  I think _is_expired() is not a good name
> choice.  Without looking at what run_put_mac_binding() my first reaction
> is to assume the mac binding is just flushed.  What actually happens is
> that the mac binding is committed to the SB.  Please revert back to the
> old name.  That also means we should probably rename the
> 'struct mac_binding' field back to 'commit_at_ms'.
>
> >              run_put_mac_binding(ovnsb_idl_txn,
> >                                  sbrec_datapath_binding_by_key,
> >                                  sbrec_port_binding_by_key,
> > @@ -4406,64 +4273,64 @@ run_buffered_binding(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> >                       const struct sbrec_mac_binding_table
> *mac_binding_table)
> >      OVS_REQUIRES(pinctrl_mutex)
> >  {
> > -    bool notify = false;
> > -
> >      if (!hmap_count(&buffered_packets_map)) {
> >          return;
> >      }
> >
> > -    const struct sbrec_mac_binding *mb;
> > -    SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (mb, mac_binding_table) {
> > +    struct hmap recent_mbs;
> > +
> > +    hmap_init(&recent_mbs);
> > +
> > +    const struct sbrec_mac_binding *smb;
> > +    SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (smb, mac_binding_table) {
> >          const struct sbrec_port_binding *pb = lport_lookup_by_name(
> > -            sbrec_port_binding_by_name, mb->logical_port);
> > +            sbrec_port_binding_by_name, smb->logical_port);
> >          if (!pb || !pb->datapath) {
> >              continue;
> >          }
> >
> > +        struct eth_addr mac;
> >          struct in6_addr ip;
> >          ovs_be32 ip4;
> > -        if (ip_parse(mb->ip, &ip4)) {
> > +
> > +        if (ip_parse(smb->ip, &ip4)) {
> >              ip = in6_addr_mapped_ipv4(ip4);
> > -        } else if (!ipv6_parse(mb->ip, &ip)) {
> > +        } else if (!ipv6_parse(smb->ip, &ip)) {
> >              continue;
> >          }
> >
> > -        struct buffered_packets *bp =
> pinctrl_find_buffered_packets_with_hash(
> > -                pb->datapath->tunnel_key, pb->tunnel_key, &ip);
> > -        if (!bp) {
> > -            if (!smap_get(&pb->options, "chassis-redirect-port")) {
> > -                continue;
> > -            }
> > -
> > -            char *redirect_name = xasprintf("cr-%s", pb->logical_port);
> > -            pb = lport_lookup_by_name(sbrec_port_binding_by_name,
> > -                                      redirect_name);
> > -            free(redirect_name);
> > -
> > -            if (!pb || !pb->datapath || strcmp(pb->type,
> "chassisredirect")) {
> > -                continue;
> > -            }
> > -
> > -            bp = pinctrl_find_buffered_packets_with_hash(
> > -                    pb->datapath->tunnel_key, pb->tunnel_key, &ip);
> > +        if (!eth_addr_from_string(smb->mac, &mac)) {
> > +            continue;
> >          }
> >
> > -        if (!bp) {
> > +        ovn_mac_binding_add(&recent_mbs, smb->datapath->tunnel_key,
> > +                            pb->tunnel_key, &ip, mac, 0, false);
> > +
> > +        const char *redirect_port =
> > +            smap_get(&pb->options, "chassis-redirect-port");
> > +        if (!redirect_port) {
> >              continue;
> >          }
> >
> > -        if (!ovs_scan(mb->mac, ETH_ADDR_SCAN_FMT,
> > -                      ETH_ADDR_SCAN_ARGS(bp->ea))) {
> > +        pb = lport_lookup_by_name(sbrec_port_binding_by_name,
> redirect_port);
> > +        if (!pb || pb->datapath->tunnel_key !=
> smb->datapath->tunnel_key ||
> > +            strcmp(pb->type, "chassisredirect")) {
> >              continue;
> >          }
> >
> > -        hmap_remove(&buffered_packets_map, &bp->hmap_node);
> > -        ovs_list_push_back(&buffered_mac_bindings, &bp->list);
> > -        notify = true;
> > +        /* Add the same entry also for chassisredirect port as the
> buffered
> > +         * traffic might be buffered on the cr port. */
> > +        ovn_mac_binding_add(&recent_mbs, smb->datapath->tunnel_key,
> > +                            pb->tunnel_key, &ip, mac, 0, false);
> >      }
> > -    buffered_packets_map_gc();
> >
> > -    if (notify) {
> > +    ovn_buffured_packets_prepare_ready(&buffered_packets_map,
> > +                                       &recent_mbs,
> > +                                       &ready_packets_data);
> > +
> > +    ovn_mac_bindings_destroy(&recent_mbs);
> > +
> > +    if (!ovs_list_is_empty(&ready_packets_data)) {
> >          notify_pinctrl_handler();
> >      }
> >  }
> > @@ -6070,7 +5937,7 @@ may_inject_pkts(void)
> >              !shash_is_empty(&send_garp_rarp_data) ||
> >              ipv6_prefixd_should_inject() ||
> >              !ovs_list_is_empty(&mcast_query_list) ||
> > -            !ovs_list_is_empty(&buffered_mac_bindings) ||
> > +            !ovs_list_is_empty(&ready_packets_data) ||
> >              bfd_monitor_should_inject());
> >  }
> >
>
> After going through the whole diff I wonder if it wouldn't be possible
> to split this refactor/optimization into even smaller patches making it
> easier to review and ensure no regressions happen.  E.g., the
> mac_binding changes could probably be separated from the
> buffered_packets changes.
>
> Thanks,
> Dumitru
>
>
>
Hi Dumitru,

thank you for the review. Hopefully I was able to incorporate all
comments for this series in v5.

Thanks,
Ales

-- 

Ales Musil

Senior Software Engineer - OVN Core

Red Hat EMEA <https://www.redhat.com>

[email protected]    IM: amusil
<https://red.ht/sig>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to