On 4/28/23 11:42, Ales Musil wrote:
> There was a race within packet buffering that could
> result in first packt being dropped. It could happen
> under following conditions and topology:
> S1 == R1 == public == R2 == S2
> SNAT on R1 and DGP on port connecting R1 with public.
> 
> 1) The GARP is sent for the GDP SNAT
> 2) The GARP is delayed on R2 because it's multicast
> 3) Some traffic that get's buffered on S2
> 4) An ARP is sent as consequence of the buffering
> 5) The delayed MAC binding is added to SB
> 6) Response for the ARP is ignored because the MAC binding
> already exists
> 7) The buffered packet is never sent out and times out
> 
> In order to prevent this behavior add I-P node that will
> keep the recently added mac bindings and those can be used
> to find the correct mac binding for packet buffering.
> 
> Signed-off-by: Ales Musil <amu...@redhat.com>
> ---

Hi Ales,

Thanks for the patch!

>  controller/mac-learn.c      |   1 -
>  controller/ovn-controller.c | 200 +++++++++++++++++++++++++++++++++++-
>  controller/pinctrl.c        |  59 ++---------
>  controller/pinctrl.h        |   6 +-
>  4 files changed, 206 insertions(+), 60 deletions(-)
> 
> diff --git a/controller/mac-learn.c b/controller/mac-learn.c
> index a2d3417eb..697a24d77 100644
> --- a/controller/mac-learn.c
> +++ b/controller/mac-learn.c
> @@ -241,7 +241,6 @@ ovn_buffered_packets_ctx_run(struct buffered_packets_ctx 
> *ctx,
>      long long now = time_msec();
>  
>      struct buffered_packets *bp;
> -

Nit: Unrelated.

>      HMAP_FOR_EACH_SAFE (bp, hmap_node, &ctx->buffered_packets) {
>          /* Remove expired buffered packets. */
>          if (now > bp->expire) {
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index c094cb74d..a74728850 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -45,6 +45,7 @@
>  #include "lib/vswitch-idl.h"
>  #include "local_data.h"
>  #include "lport.h"
> +#include "mac-learn.h"
>  #include "memory.h"
>  #include "ofctrl.h"
>  #include "ofctrl-seqno.h"
> @@ -4303,6 +4304,177 @@ pflow_output_debug_handler(struct engine_node *node, 
> void *data)
>      return true;
>  }
>  
> +static void
> +recent_mac_bindings_add(struct mac_bindings_map *recent_mbs,
> +                        const struct sbrec_mac_binding *smb,
> +                        const struct hmap *local_datapaths,
> +                        struct ovsdb_idl_index *sbrec_port_binding_by_name)
> +{
> +    struct in6_addr ip;
> +    struct eth_addr mac;
> +
> +    if (!get_local_datapath(local_datapaths, smb->datapath->tunnel_key)) {
> +        return;
> +    }
> +
> +    const struct sbrec_port_binding *pb =
> +        lport_lookup_by_name(sbrec_port_binding_by_name, smb->logical_port);
> +    if (!pb) {
> +        return;
> +    }
> +
> +    if (!eth_addr_from_string(smb->mac, &mac)) {
> +        return;
> +    }
> +
> +    if (!ip_parse_mapped(smb->ip, &ip)) {
> +        return;
> +    }
> +
> +    /* Give the recent mac_binding entry bigger timeout than the buffered
> +     * packet in case the MAC binding is created earlier. */
> +    ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, 
> pb->tunnel_key,
> +                        &ip, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT);
> +
> +    const char *redirect_port =
> +        smap_get(&pb->options, "chassis-redirect-port");
> +    if (!redirect_port) {
> +        return;
> +    }
> +
> +    pb = lport_lookup_by_name(sbrec_port_binding_by_name, redirect_port);
> +    if (!pb || pb->datapath->tunnel_key != smb->datapath->tunnel_key ||
> +        strcmp(pb->type, "chassisredirect")) {
> +        return;
> +    }
> +
> +    /* Add the same entry also for chassisredirect port as the traffic might
> +     * be buffered on the cr port. */
> +    ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, 
> pb->tunnel_key,
> +                        &ip, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT);
> +}
> +
> +static void *
> +en_recent_mac_bindings_init(struct engine_node *node OVS_UNUSED,
> +                            struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct mac_bindings_map *recent_mbs = xmalloc(sizeof *recent_mbs);
> +    ovn_mac_bindings_map_init(recent_mbs, 0);
> +
> +    return recent_mbs;
> +}
> +
> +static void
> +en_recent_mac_bindings_cleanup(void *data)
> +{
> +    struct mac_bindings_map *recent_mbs = data;
> +    ovn_mac_bindings_map_destroy(recent_mbs);
> +}
> +
> +static void
> +en_recent_mac_bindings_clear_tracked_data(void *data)
> +{
> +    long long now = time_msec();
> +    struct mac_bindings_map *recent_mbs = data;
> +
> +    struct mac_binding *mb;
> +    HMAP_FOR_EACH_SAFE (mb, hmap_node, &recent_mbs->map) {
> +        if (ovn_is_mac_binding_timestamp_past(mb, now)) {
> +            ovn_mac_binding_remove(mb, recent_mbs);
> +        }
> +    }
> +}
> +
> +static void
> +en_recent_mac_bindings_run(struct engine_node *node, void *data)
> +{
> +    struct mac_bindings_map *recent_mbs = data;
> +    struct ed_type_runtime_data *rt_data =
> +        engine_get_input_data("runtime_data", node);
> +    const struct sbrec_mac_binding_table *mac_binding_table =
> +        EN_OVSDB_GET(engine_get_input("SB_mac_binding", node));
> +    struct ovsdb_idl_index *sbrec_port_binding_by_name =
> +        engine_ovsdb_node_get_index(
> +            engine_get_input("SB_port_binding", node),
> +            "name");
> +
> +    const struct sbrec_mac_binding *smb;
> +    SBREC_MAC_BINDING_TABLE_FOR_EACH (smb, mac_binding_table) {
> +        recent_mac_bindings_add(recent_mbs, smb, &rt_data->local_datapaths,
> +                                sbrec_port_binding_by_name);
> +    }
> +}
> +

Do you foresee any performance issue due to iterating through all
(monitored) SB mac bindings?  I understand we need that for I-P but I
want to make sure we understand the potential cost.

With this change we'll have to pay the performance price every time a
recompute of the recent_mac_binding node happens.   That's every time a
full recompute happens but also in some other cases (e.g., a datapath is
removed).

Thanks,
Dumitru

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to