On Tue, May 16, 2023 at 1:29 PM Dumitru Ceara <[email protected]> wrote:
> On 5/15/23 12:46, Ales Musil wrote: > > There was a race within packet buffering that could > > result in first packt being dropped. It could happen > > under following conditions and topology: > > S1 == R1 == public == R2 == S2 > > SNAT on R1 and DGP on port connecting R1 with public. > > > > 1) The GARP is sent for the DGP SNAT > > 2) The GARP is delayed on R2 because it's multicast > > 3) The MAC binding is added to SB > > 4) Some traffic that gets buffered on S2 > > 5) An ARP is sent as consequence of the buffering > > 6) Response for the ARP is ignored on lflow level, > > because the MAC binding already exists > > 7) The buffered packet is never sent out and times out > > > > In order to prevent the race add additonal lookup for the > > buffered packets. When the packet is created do initial > > lookup right away as there is a chance that the MAC binding > > is already present in database. In any other case that would > > prevent the tracked loop to provide correct information do > > SB lookup, but only after certain timeout. > > > > Signed-off-by: Ales Musil <[email protected]> > > --- > > Hi Ales, > > > controller/mac-learn.c | 119 ++++++++++++++++++++++++++++++++++++++--- > > controller/mac-learn.h | 20 +++++-- > > controller/pinctrl.c | 57 +++++++++++--------- > > 3 files changed, 159 insertions(+), 37 deletions(-) > > > > diff --git a/controller/mac-learn.c b/controller/mac-learn.c > > index 296a70242..d27dcf386 100644 > > --- a/controller/mac-learn.c > > +++ b/controller/mac-learn.c > > @@ -23,12 +23,16 @@ > > #include "lib/packets.h" > > #include "lib/smap.h" > > #include "lib/timeval.h" > > +#include "lport.h" > > +#include "ovn-sb-idl.h" > > > > VLOG_DEFINE_THIS_MODULE(mac_learn); > > > > -#define MAX_FDB_ENTRIES 1000 > > -#define MAX_BUFFERED_PACKETS 1000 > > -#define BUFFER_QUEUE_DEPTH 4 > > +#define MAX_FDB_ENTRIES 1000 > > +#define MAX_BUFFERED_PACKETS 1000 > > +#define BUFFER_QUEUE_DEPTH 4 > > +#define BUFFERED_PACKETS_TIMEOUT_MS 10000 > > +#define BUFFERED_PACKETS_LOOKUP_MS 100 > > > > static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key, > > struct in6_addr *ip); > > @@ -45,6 +49,13 @@ buffered_packets_find(struct buffered_packets_ctx > *ctx, uint64_t dp_key, > > uint64_t port_key, struct in6_addr *ip, uint32_t > hash); > > static void ovn_buffered_packets_remove(struct buffered_packets_ctx > *ctx, > > struct buffered_packets *bp); > > +static void > > +buffered_packets_db_lookup(struct buffered_packets *bp, > > + struct ds *ip, struct eth_addr *mac, > > + struct ovsdb_idl_index *sbrec_pb_by_key, > > + struct ovsdb_idl_index *sbrec_dp_by_key, > > + struct ovsdb_idl_index *sbrec_pb_by_name, > > + struct ovsdb_idl_index > *sbrec_mb_by_lport_ip); > > > > /* mac_binding functions. */ > > void > > @@ -121,6 +132,23 @@ ovn_mac_binding_timed_out(const struct mac_binding > *mb, long long now) > > return now >= mb->timeout_at_ms; > > } > > > > +const struct sbrec_mac_binding * > > +ovn_mac_binding_lookup(struct ovsdb_idl_index > *sbrec_mac_binding_by_lport_ip, > > + const char *logical_port, const char *ip) > > +{ > > + struct sbrec_mac_binding *mb = > > + sbrec_mac_binding_index_init_row(sbrec_mac_binding_by_lport_ip); > > + sbrec_mac_binding_index_set_logical_port(mb, logical_port); > > + sbrec_mac_binding_index_set_ip(mb, ip); > > + > > + const struct sbrec_mac_binding *retval = > > + sbrec_mac_binding_index_find(sbrec_mac_binding_by_lport_ip, mb); > > + > > + sbrec_mac_binding_index_destroy_row(mb); > > + > > + return retval; > > +} > > + > > /* fdb functions. */ > > void > > ovn_fdb_init(struct hmap *fdbs) > > @@ -200,6 +228,7 @@ ovn_buffered_packets_add(struct buffered_packets_ctx > *ctx, uint64_t dp_key, > > struct buffered_packets *bp; > > > > uint32_t hash = keys_ip_hash(dp_key, port_key, &ip); > > + long long now = time_msec(); > > > > bp = buffered_packets_find(ctx, dp_key, port_key, &ip, hash); > > if (!bp) { > > @@ -212,10 +241,13 @@ ovn_buffered_packets_add(struct > buffered_packets_ctx *ctx, uint64_t dp_key, > > bp->ip = ip; > > bp->dp_key = dp_key; > > bp->port_key = port_key; > > + /* Schedule the freshly added buffered packet to do lookup > > + * immediately. */ > > + bp->lookup_at_ms = 0; > > ovs_list_init(&bp->queue); > > } > > > > - bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT_MS; > > + bp->expire_at_ms = now + BUFFERED_PACKETS_TIMEOUT_MS; > > > > return bp; > > } > > @@ -235,15 +267,22 @@ ovn_buffered_packets_packet_data_enqueue(struct > buffered_packets *bp, > > > > void > > ovn_buffered_packets_ctx_run(struct buffered_packets_ctx *ctx, > > - const struct mac_bindings_map *recent_mbs) > > + const struct mac_bindings_map *recent_mbs, > > + struct ovsdb_idl_index *sbrec_pb_by_key, > > + struct ovsdb_idl_index *sbrec_dp_by_key, > > + struct ovsdb_idl_index *sbrec_pb_by_name, > > + struct ovsdb_idl_index > *sbrec_mb_by_lport_ip) > > { > > + struct ds ip = DS_EMPTY_INITIALIZER; > > long long now = time_msec(); > > + ctx->next_lookup_at_ms = LLONG_MAX; > > > > struct buffered_packets *bp; > > > > HMAP_FOR_EACH_SAFE (bp, hmap_node, &ctx->buffered_packets) { > > + struct eth_addr mac = eth_addr_zero; > > /* Remove expired buffered packets. */ > > - if (now > bp->expire) { > > + if (now > bp->expire_at_ms) { > > I think this should be: > > if (now >= bp->expire_at_ms) { > > Otherwise there's a chance we don't do an immediate lookup for newly > buffered packets. > This is for the timeout when the buffered packets that did not find match should be removed. As discussed offline it can stay as is. > > > ovn_buffered_packets_remove(ctx, bp); > > continue; > > } > > @@ -252,20 +291,36 @@ ovn_buffered_packets_ctx_run(struct > buffered_packets_ctx *ctx, > > struct mac_binding *mb = mac_binding_find(recent_mbs, > bp->dp_key, > > bp->port_key, > &bp->ip, hash); > > > > - if (!mb) { > > + if (mb) { > > + mac = mb->mac; > > + } else if (now >= bp->lookup_at_ms) { > > + /* Check if we can do a full lookup. */ > > + buffered_packets_db_lookup(bp, &ip, &mac, sbrec_pb_by_key, > > + sbrec_dp_by_key, > sbrec_pb_by_name, > > + sbrec_mb_by_lport_ip); > > + /* Schedule next lookup even if we found the MAC address, > > + * if the address was found this struct will be deleted > anyway. */ > > + bp->lookup_at_ms = now + BUFFERED_PACKETS_LOOKUP_MS; > > + } > > + > > + if (eth_addr_is_zero(mac)) { > > + ctx->next_lookup_at_ms = MIN(ctx->next_lookup_at_ms, > > + bp->lookup_at_ms); > > continue; > > } > > > > struct packet_data *pd; > > LIST_FOR_EACH_POP (pd, node, &bp->queue) { > > struct eth_header *eth = dp_packet_data(pd->p); > > - eth->eth_dst = mb->mac; > > + eth->eth_dst = mac; > > > > ovs_list_push_back(&ctx->ready_packets_data, &pd->node); > > } > > > > ovn_buffered_packets_remove(ctx, bp); > > } > > I think there's a chance for constantly waking up in case we have > continuous traffic for a destination that's not reachable. I think I'd > add this: > > /* Don't retry lookup for at least BUFFERED_PACKETS_LOOKUP_MS. */ > ctx->next_lookup_at_ms = MAX(ctx->next_lookup_at_ms, > now + BUFFERED_PACKETS_LOOKUP_MS); > > If mac bindings are learned we wake up immediately anyway. If not, > there's no point to wake up earlier than 100ms from now. > > This brings me to the question: do we actually need > ctx->next_lookup_at_ms? Isn't it good enough that we run this loop > every time a mac binding is learned? The main thread wakes up for the > mac binding SB change. > > The rest of the code looks good to me. > > Regards, > Dumitru > > Yeah you are right, I'll remove the next_lookup_at_ms. Thanks, Ales -- Ales Musil Senior Software Engineer - OVN Core Red Hat EMEA <https://www.redhat.com> [email protected] IM: amusil <https://red.ht/sig> _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
