On 4/28/23 11:42, Ales Musil wrote: > There was a race within packet buffering that could > result in first packt being dropped. It could happen > under following conditions and topology: > S1 == R1 == public == R2 == S2 > SNAT on R1 and DGP on port connecting R1 with public. > > 1) The GARP is sent for the GDP SNAT > 2) The GARP is delayed on R2 because it's multicast > 3) Some traffic that get's buffered on S2 > 4) An ARP is sent as consequence of the buffering > 5) The delayed MAC binding is added to SB > 6) Response for the ARP is ignored because the MAC binding > already exists > 7) The buffered packet is never sent out and times out > > In order to prevent this behavior add I-P node that will > keep the recently added mac bindings and those can be used > to find the correct mac binding for packet buffering. > > Signed-off-by: Ales Musil <amu...@redhat.com> > ---
Hi Ales, Thanks for the patch! > controller/mac-learn.c | 1 - > controller/ovn-controller.c | 200 +++++++++++++++++++++++++++++++++++- > controller/pinctrl.c | 59 ++--------- > controller/pinctrl.h | 6 +- > 4 files changed, 206 insertions(+), 60 deletions(-) > > diff --git a/controller/mac-learn.c b/controller/mac-learn.c > index a2d3417eb..697a24d77 100644 > --- a/controller/mac-learn.c > +++ b/controller/mac-learn.c > @@ -241,7 +241,6 @@ ovn_buffered_packets_ctx_run(struct buffered_packets_ctx > *ctx, > long long now = time_msec(); > > struct buffered_packets *bp; > - Nit: Unrelated. > HMAP_FOR_EACH_SAFE (bp, hmap_node, &ctx->buffered_packets) { > /* Remove expired buffered packets. */ > if (now > bp->expire) { > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index c094cb74d..a74728850 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -45,6 +45,7 @@ > #include "lib/vswitch-idl.h" > #include "local_data.h" > #include "lport.h" > +#include "mac-learn.h" > #include "memory.h" > #include "ofctrl.h" > #include "ofctrl-seqno.h" > @@ -4303,6 +4304,177 @@ pflow_output_debug_handler(struct engine_node *node, > void *data) > return true; > } > > +static void > +recent_mac_bindings_add(struct mac_bindings_map *recent_mbs, > + const struct sbrec_mac_binding *smb, > + const struct hmap *local_datapaths, > + struct ovsdb_idl_index *sbrec_port_binding_by_name) > +{ > + struct in6_addr ip; > + struct eth_addr mac; > + > + if (!get_local_datapath(local_datapaths, smb->datapath->tunnel_key)) { > + return; > + } > + > + const struct sbrec_port_binding *pb = > + lport_lookup_by_name(sbrec_port_binding_by_name, smb->logical_port); > + if (!pb) { > + return; > + } > + > + if (!eth_addr_from_string(smb->mac, &mac)) { > + return; > + } > + > + if (!ip_parse_mapped(smb->ip, &ip)) { > + return; > + } > + > + /* Give the recent mac_binding entry bigger timeout than the buffered > + * packet in case the MAC binding is created earlier. */ > + ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, > pb->tunnel_key, > + &ip, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT); > + > + const char *redirect_port = > + smap_get(&pb->options, "chassis-redirect-port"); > + if (!redirect_port) { > + return; > + } > + > + pb = lport_lookup_by_name(sbrec_port_binding_by_name, redirect_port); > + if (!pb || pb->datapath->tunnel_key != smb->datapath->tunnel_key || > + strcmp(pb->type, "chassisredirect")) { > + return; > + } > + > + /* Add the same entry also for chassisredirect port as the traffic might > + * be buffered on the cr port. */ > + ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, > pb->tunnel_key, > + &ip, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT); > +} > + > +static void * > +en_recent_mac_bindings_init(struct engine_node *node OVS_UNUSED, > + struct engine_arg *arg OVS_UNUSED) > +{ > + struct mac_bindings_map *recent_mbs = xmalloc(sizeof *recent_mbs); > + ovn_mac_bindings_map_init(recent_mbs, 0); > + > + return recent_mbs; > +} > + > +static void > +en_recent_mac_bindings_cleanup(void *data) > +{ > + struct mac_bindings_map *recent_mbs = data; > + ovn_mac_bindings_map_destroy(recent_mbs); > +} > + > +static void > +en_recent_mac_bindings_clear_tracked_data(void *data) > +{ > + long long now = time_msec(); > + struct mac_bindings_map *recent_mbs = data; > + > + struct mac_binding *mb; > + HMAP_FOR_EACH_SAFE (mb, hmap_node, &recent_mbs->map) { > + if (ovn_is_mac_binding_timestamp_past(mb, now)) { > + ovn_mac_binding_remove(mb, recent_mbs); > + } > + } > +} > + > +static void > +en_recent_mac_bindings_run(struct engine_node *node, void *data) > +{ > + struct mac_bindings_map *recent_mbs = data; > + struct ed_type_runtime_data *rt_data = > + engine_get_input_data("runtime_data", node); > + const struct sbrec_mac_binding_table *mac_binding_table = > + EN_OVSDB_GET(engine_get_input("SB_mac_binding", node)); > + struct ovsdb_idl_index *sbrec_port_binding_by_name = > + engine_ovsdb_node_get_index( > + engine_get_input("SB_port_binding", node), > + "name"); > + > + const struct sbrec_mac_binding *smb; > + SBREC_MAC_BINDING_TABLE_FOR_EACH (smb, mac_binding_table) { > + recent_mac_bindings_add(recent_mbs, smb, &rt_data->local_datapaths, > + sbrec_port_binding_by_name); > + } > +} > + Do you foresee any performance issue due to iterating through all (monitored) SB mac bindings? I understand we need that for I-P but I want to make sure we understand the potential cost. With this change we'll have to pay the performance price every time a recompute of the recent_mac_binding node happens. That's every time a full recompute happens but also in some other cases (e.g., a datapath is removed). Thanks, Dumitru _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev