On 5/15/23 12:46, Ales Musil wrote:
> There was a race within packet buffering that could
> result in first packt being dropped. It could happen
> under following conditions and topology:
> S1 == R1 == public == R2 == S2
> SNAT on R1 and DGP on port connecting R1 with public.
>
> 1) The GARP is sent for the DGP SNAT
> 2) The GARP is delayed on R2 because it's multicast
> 3) The MAC binding is added to SB
> 4) Some traffic that gets buffered on S2
> 5) An ARP is sent as consequence of the buffering
> 6) Response for the ARP is ignored on lflow level,
> because the MAC binding already exists
> 7) The buffered packet is never sent out and times out
>
> In order to prevent the race add additonal lookup for the
> buffered packets. When the packet is created do initial
> lookup right away as there is a chance that the MAC binding
> is already present in database. In any other case that would
> prevent the tracked loop to provide correct information do
> SB lookup, but only after certain timeout.
>
> Signed-off-by: Ales Musil <[email protected]>
> ---
Hi Ales,
> controller/mac-learn.c | 119 ++++++++++++++++++++++++++++++++++++++---
> controller/mac-learn.h | 20 +++++--
> controller/pinctrl.c | 57 +++++++++++---------
> 3 files changed, 159 insertions(+), 37 deletions(-)
>
> diff --git a/controller/mac-learn.c b/controller/mac-learn.c
> index 296a70242..d27dcf386 100644
> --- a/controller/mac-learn.c
> +++ b/controller/mac-learn.c
> @@ -23,12 +23,16 @@
> #include "lib/packets.h"
> #include "lib/smap.h"
> #include "lib/timeval.h"
> +#include "lport.h"
> +#include "ovn-sb-idl.h"
>
> VLOG_DEFINE_THIS_MODULE(mac_learn);
>
> -#define MAX_FDB_ENTRIES 1000
> -#define MAX_BUFFERED_PACKETS 1000
> -#define BUFFER_QUEUE_DEPTH 4
> +#define MAX_FDB_ENTRIES 1000
> +#define MAX_BUFFERED_PACKETS 1000
> +#define BUFFER_QUEUE_DEPTH 4
> +#define BUFFERED_PACKETS_TIMEOUT_MS 10000
> +#define BUFFERED_PACKETS_LOOKUP_MS 100
>
> static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key,
> struct in6_addr *ip);
> @@ -45,6 +49,13 @@ buffered_packets_find(struct buffered_packets_ctx *ctx,
> uint64_t dp_key,
> uint64_t port_key, struct in6_addr *ip, uint32_t hash);
> static void ovn_buffered_packets_remove(struct buffered_packets_ctx *ctx,
> struct buffered_packets *bp);
> +static void
> +buffered_packets_db_lookup(struct buffered_packets *bp,
> + struct ds *ip, struct eth_addr *mac,
> + struct ovsdb_idl_index *sbrec_pb_by_key,
> + struct ovsdb_idl_index *sbrec_dp_by_key,
> + struct ovsdb_idl_index *sbrec_pb_by_name,
> + struct ovsdb_idl_index *sbrec_mb_by_lport_ip);
>
> /* mac_binding functions. */
> void
> @@ -121,6 +132,23 @@ ovn_mac_binding_timed_out(const struct mac_binding *mb,
> long long now)
> return now >= mb->timeout_at_ms;
> }
>
> +const struct sbrec_mac_binding *
> +ovn_mac_binding_lookup(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> + const char *logical_port, const char *ip)
> +{
> + struct sbrec_mac_binding *mb =
> + sbrec_mac_binding_index_init_row(sbrec_mac_binding_by_lport_ip);
> + sbrec_mac_binding_index_set_logical_port(mb, logical_port);
> + sbrec_mac_binding_index_set_ip(mb, ip);
> +
> + const struct sbrec_mac_binding *retval =
> + sbrec_mac_binding_index_find(sbrec_mac_binding_by_lport_ip, mb);
> +
> + sbrec_mac_binding_index_destroy_row(mb);
> +
> + return retval;
> +}
> +
> /* fdb functions. */
> void
> ovn_fdb_init(struct hmap *fdbs)
> @@ -200,6 +228,7 @@ ovn_buffered_packets_add(struct buffered_packets_ctx
> *ctx, uint64_t dp_key,
> struct buffered_packets *bp;
>
> uint32_t hash = keys_ip_hash(dp_key, port_key, &ip);
> + long long now = time_msec();
>
> bp = buffered_packets_find(ctx, dp_key, port_key, &ip, hash);
> if (!bp) {
> @@ -212,10 +241,13 @@ ovn_buffered_packets_add(struct buffered_packets_ctx
> *ctx, uint64_t dp_key,
> bp->ip = ip;
> bp->dp_key = dp_key;
> bp->port_key = port_key;
> + /* Schedule the freshly added buffered packet to do lookup
> + * immediately. */
> + bp->lookup_at_ms = 0;
> ovs_list_init(&bp->queue);
> }
>
> - bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT_MS;
> + bp->expire_at_ms = now + BUFFERED_PACKETS_TIMEOUT_MS;
>
> return bp;
> }
> @@ -235,15 +267,22 @@ ovn_buffered_packets_packet_data_enqueue(struct
> buffered_packets *bp,
>
> void
> ovn_buffered_packets_ctx_run(struct buffered_packets_ctx *ctx,
> - const struct mac_bindings_map *recent_mbs)
> + const struct mac_bindings_map *recent_mbs,
> + struct ovsdb_idl_index *sbrec_pb_by_key,
> + struct ovsdb_idl_index *sbrec_dp_by_key,
> + struct ovsdb_idl_index *sbrec_pb_by_name,
> + struct ovsdb_idl_index *sbrec_mb_by_lport_ip)
> {
> + struct ds ip = DS_EMPTY_INITIALIZER;
> long long now = time_msec();
> + ctx->next_lookup_at_ms = LLONG_MAX;
>
> struct buffered_packets *bp;
>
> HMAP_FOR_EACH_SAFE (bp, hmap_node, &ctx->buffered_packets) {
> + struct eth_addr mac = eth_addr_zero;
> /* Remove expired buffered packets. */
> - if (now > bp->expire) {
> + if (now > bp->expire_at_ms) {
I think this should be:
if (now >= bp->expire_at_ms) {
Otherwise there's a chance we don't do an immediate lookup for newly
buffered packets.
> ovn_buffered_packets_remove(ctx, bp);
> continue;
> }
> @@ -252,20 +291,36 @@ ovn_buffered_packets_ctx_run(struct
> buffered_packets_ctx *ctx,
> struct mac_binding *mb = mac_binding_find(recent_mbs, bp->dp_key,
> bp->port_key, &bp->ip,
> hash);
>
> - if (!mb) {
> + if (mb) {
> + mac = mb->mac;
> + } else if (now >= bp->lookup_at_ms) {
> + /* Check if we can do a full lookup. */
> + buffered_packets_db_lookup(bp, &ip, &mac, sbrec_pb_by_key,
> + sbrec_dp_by_key, sbrec_pb_by_name,
> + sbrec_mb_by_lport_ip);
> + /* Schedule next lookup even if we found the MAC address,
> + * if the address was found this struct will be deleted anyway.
> */
> + bp->lookup_at_ms = now + BUFFERED_PACKETS_LOOKUP_MS;
> + }
> +
> + if (eth_addr_is_zero(mac)) {
> + ctx->next_lookup_at_ms = MIN(ctx->next_lookup_at_ms,
> + bp->lookup_at_ms);
> continue;
> }
>
> struct packet_data *pd;
> LIST_FOR_EACH_POP (pd, node, &bp->queue) {
> struct eth_header *eth = dp_packet_data(pd->p);
> - eth->eth_dst = mb->mac;
> + eth->eth_dst = mac;
>
> ovs_list_push_back(&ctx->ready_packets_data, &pd->node);
> }
>
> ovn_buffered_packets_remove(ctx, bp);
> }
I think there's a chance for constantly waking up in case we have
continuous traffic for a destination that's not reachable. I think I'd
add this:
/* Don't retry lookup for at least BUFFERED_PACKETS_LOOKUP_MS. */
ctx->next_lookup_at_ms = MAX(ctx->next_lookup_at_ms,
now + BUFFERED_PACKETS_LOOKUP_MS);
If mac bindings are learned we wake up immediately anyway. If not,
there's no point to wake up earlier than 100ms from now.
This brings me to the question: do we actually need
ctx->next_lookup_at_ms? Isn't it good enough that we run this loop
every time a mac binding is learned? The main thread wakes up for the
mac binding SB change.
The rest of the code looks good to me.
Regards,
Dumitru
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev