On Wed, Jul 2, 2025 at 11:50 AM Xavier Simonart via dev < ovs-dev@openvswitch.org> wrote:
> When two gateways fight in ha mode to bind the router port, > there were cases in which one of the gateway almost always > immediately reclaimed the router port, as the grace time was > honored by the other gw. This is also the case since [1] for > the highest priority gw. > In such a case, the garp_rarp module did not detect the change in > the router port binding and was not resetting the garp "count", > potentially resulting in no new garps. > > Also, with a fight time of 500 msec there is not enough time for gw > to send garp, so increase the grace time to two seconds. > > [1] controller: Do not postpone claim for highest priority chassis. > > Fixes: 8ab8570e21ff ("pinctrl: Fix missing garp.") > Reported-at: https://issues.redhat.com/browse/FDP-1418 > Signed-off-by: Xavier Simonart <xsimo...@redhat.com> > --- > Hi Xavier, thank you for the patch, this solution doesn't feel right. As demonstrated by the multinode test it solves the problem but in quite a peculiar way. We need to reset the timers when we detect that interface is claimed again by the current chassis, so the complete removal of the garp_rarp node feels like complete overkill to me, we can really just reset the timers if it exists. But there might be slightly different approach please see down below. controller/binding.c | 2 +- > controller/garp_rarp.c | 43 ++++++++++++++++++++++++++++++++----- > controller/garp_rarp.h | 2 ++ > controller/if-status.c | 17 +++++++++++++++ > controller/if-status.h | 1 + > controller/ovn-controller.c | 10 +++++++-- > 6 files changed, 67 insertions(+), 8 deletions(-) > > diff --git a/controller/binding.c b/controller/binding.c > index 947340203..d483887e1 100644 > --- a/controller/binding.c > +++ b/controller/binding.c > @@ -49,7 +49,7 @@ VLOG_DEFINE_THIS_MODULE(binding); > > #define OVN_QOS_TYPE "linux-htb" > > -#define CLAIM_TIME_THRESHOLD_MS 500 > +#define CLAIM_TIME_THRESHOLD_MS 2000 > > struct claimed_port { > long long int last_claimed; > diff --git a/controller/garp_rarp.c b/controller/garp_rarp.c > index 551d8303f..c461570c9 100644 > --- a/controller/garp_rarp.c > +++ b/controller/garp_rarp.c > @@ -24,6 +24,7 @@ > #include "ovn/lex.h" > #include "garp_rarp.h" > #include "ovn-sb-idl.h" > +#include "if-status.h" > > VLOG_DEFINE_THIS_MODULE(garp_rarp); > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > @@ -33,6 +34,10 @@ static struct vlog_rate_limit rl = > VLOG_RATE_LIMIT_INIT(5, 20); > static bool garp_rarp_data_has_changed = false; > static struct garp_rarp_data garp_rarp_data; > > +static void > +garp_rarp_node_del(const struct eth_addr ea, ovs_be32 ip, > + uint32_t dp_key, uint32_t port_key); > + > /* Get localnet vifs, local l3gw ports and ofport for localnet patch > ports. */ > static void > get_localnet_vifs_l3gwports( > @@ -144,7 +149,8 @@ consider_nat_address(struct ovsdb_idl_index > *sbrec_port_binding_by_name, > const struct sbrec_chassis *chassis, > struct shash *nat_addresses, > struct sset *non_local_lports, > - struct sset *local_lports) > + struct sset *local_lports, > + struct if_status_mgr *mgr) > { > struct lport_addresses *laddrs = xmalloc(sizeof *laddrs); > char *lport = NULL; > @@ -165,6 +171,16 @@ consider_nat_address(struct ovsdb_idl_index > *sbrec_port_binding_by_name, > free(lport); > return; > } else { > + const struct sbrec_port_binding *cr_pb = > + lport_lookup_by_name(sbrec_port_binding_by_name, lport); > + if (if_status_reclaimed(mgr, cr_pb->logical_port)) { > + /* lport just got claimed. make sure to reset garp count. > */ > + for (size_t i = 0; i < laddrs->n_ipv4_addrs; i++) { > + garp_rarp_node_del(laddrs->ea, > laddrs->ipv4_addrs[i].addr, > + pb->datapath->tunnel_key, > + pb->tunnel_key); > + } > + } > sset_add(local_lports, lport); > } > } > @@ -191,7 +207,8 @@ get_nat_addresses_and_keys(struct ovsdb_idl_index > *sbrec_port_binding_by_name, > const struct sbrec_chassis *chassis, > struct shash *nat_addresses, > struct sset *non_local_lports, > - struct sset *local_lports) > + struct sset *local_lports, > + struct if_status_mgr *mgr) > { > const char *gw_port; > SSET_FOR_EACH (gw_port, local_l3gw_ports) { > @@ -209,7 +226,8 @@ get_nat_addresses_and_keys(struct ovsdb_idl_index > *sbrec_port_binding_by_name, > nat_address_keys, chassis, > nat_addresses, > non_local_lports, > - local_lports); > + local_lports, > + mgr); > } > } else { > /* Continue to support options:nat-addresses for version > @@ -222,7 +240,8 @@ get_nat_addresses_and_keys(struct ovsdb_idl_index > *sbrec_port_binding_by_name, > nat_address_keys, chassis, > nat_addresses, > non_local_lports, > - local_lports); > + local_lports, > + mgr); > } > } > } > @@ -271,6 +290,19 @@ garp_rarp_lookup(const struct eth_addr ea, ovs_be32 > ipv4, uint32_t dp_key, > return NULL; > } > > +static void > +garp_rarp_node_del(const struct eth_addr ea, ovs_be32 ip, > + uint32_t dp_key, uint32_t port_key) > +{ > + struct garp_rarp_node *grn = garp_rarp_lookup(ea, ip, dp_key, > port_key); > + if (grn) { > + cmap_remove(&garp_rarp_data.data, &grn->cmap_node, > + garp_rarp_node_hash_struct(grn)); > + garp_rarp_node_free(grn); > nit: We cannot free that node right away. 'ovsrcu_postpone(garp_rarp_node_free, grn);' > + garp_rarp_data_has_changed = true; > + } > +} > + > static void > garp_rarp_node_add(const struct eth_addr ea, ovs_be32 ip, > uint32_t dp_key, uint32_t port_key) > @@ -433,7 +465,8 @@ garp_rarp_run(struct garp_rarp_ctx_in *r_ctx_in) > &nat_ip_keys, &local_l3gw_ports, > r_ctx_in->chassis, &nat_addresses, > &r_ctx_in->data->non_local_lports, > - &r_ctx_in->data->local_lports); > + &r_ctx_in->data->local_lports, > + r_ctx_in->mgr); > > /* Update send_garp_rarp_data. */ > const char *iface_id; > diff --git a/controller/garp_rarp.h b/controller/garp_rarp.h > index 38f19e1c0..2eccab37d 100644 > --- a/controller/garp_rarp.h > +++ b/controller/garp_rarp.h > @@ -21,6 +21,7 @@ > #include "cmap.h" > #include "sset.h" > #include "openvswitch/types.h" > +#include "if-status.h" > > /* Contains a single mac and ip address that should be announced. */ > struct garp_rarp_node { > @@ -57,6 +58,7 @@ struct garp_rarp_ctx_in { > const struct hmap *local_datapaths; > const struct sset *active_tunnels; > struct ed_type_garp_rarp *data; > + struct if_status_mgr *mgr; > }; > > struct ed_type_garp_rarp { > diff --git a/controller/if-status.c b/controller/if-status.c > index 2358b4b3d..7ad1c85a6 100644 > --- a/controller/if-status.c > +++ b/controller/if-status.c > @@ -191,6 +191,8 @@ struct ovs_iface { > uint16_t mtu; /* Extracted from OVS interface.mtu field. */ > enum can_bind bind_type;/* CAN_BIND_AS_MAIN or CAN_BIND_AS_ADDITIONAL > */ > bool is_vif; /* Vifs, container or virtual ports */ > + bool reclaimed; /* Iface, which was claimed by another > chassis, > + * is claimed */ > }; > > static uint64_t ifaces_usage; > @@ -314,6 +316,9 @@ if_status_mgr_claim_iface(struct if_status_mgr *mgr, > sizeof(iface->pb_uuid)); > } > if (!sb_readonly) { > + if (pb->chassis && pb->chassis != chassis_rec) { > + iface->reclaimed = true; > + } > if (bind_type == CAN_BIND_AS_MAIN) { > set_pb_chassis_in_sbrec(pb, chassis_rec, true); > } else if (bind_type == CAN_BIND_AS_ADDITIONAL) { > @@ -346,6 +351,18 @@ if_status_mgr_iface_is_present(struct if_status_mgr > *mgr, const char *iface_id) > return !!shash_find_data(&mgr->ifaces, iface_id); > } > > +bool > +if_status_reclaimed(struct if_status_mgr *mgr, const char *iface_id) > +{ > + struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id); > + if (!iface) { > + return false; > + } > + bool reclaimed = iface->reclaimed; > + iface->reclaimed = false; > + return reclaimed; > +} > + > void > if_status_mgr_release_iface(struct if_status_mgr *mgr, const char > *iface_id) > { > diff --git a/controller/if-status.h b/controller/if-status.h > index d4f972355..e2592a20f 100644 > --- a/controller/if-status.h > +++ b/controller/if-status.h > @@ -67,5 +67,6 @@ bool if_status_mgr_iface_update(const struct > if_status_mgr *mgr, > const struct ovsrec_interface *iface_rec); > bool if_status_is_port_claimed(const struct if_status_mgr *mgr, > const char *iface_id); > +bool if_status_reclaimed(struct if_status_mgr *mgr, const char *iface_id); > > # endif /* controller/if-status.h */ > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index 4cbf1d858..8350fd381 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -5455,6 +5455,8 @@ static enum engine_node_state > en_garp_rarp_run(struct engine_node *node, void *data_) > { > struct ed_type_garp_rarp *data = data_; > + struct controller_engine_ctx *ctrl_ctx = > + engine_get_context()->client_ctx; > > const struct ovsrec_open_vswitch_table *ovs_table = > EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node)); > @@ -5504,6 +5506,7 @@ en_garp_rarp_run(struct engine_node *node, void > *data_) > .active_tunnels = &rt_data->active_tunnels, > .local_datapaths = &rt_data->local_datapaths, > .data = data, > + .mgr = ctrl_ctx->if_mgr, > }; > > garp_rarp_run(&r_ctx_in); > @@ -5554,6 +5557,7 @@ garp_rarp_sb_port_binding_handler(struct engine_node > *node, > engine_ovsdb_node_get_index( > engine_get_input("SB_port_binding", node), > "name"); > + struct controller_engine_ctx *ctrl_ctx = > engine_get_context()->client_ctx; > > const struct sbrec_port_binding *pb; > SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (pb, port_binding_table) { > @@ -5577,8 +5581,10 @@ garp_rarp_sb_port_binding_handler(struct > engine_node *node, > } > > if (sset_contains(&data->local_lports, pb->logical_port) && > - !lport_is_chassis_resident(sbrec_port_binding_by_name, > chassis, > - pb->logical_port)) { > + (!lport_is_chassis_resident(sbrec_port_binding_by_name, > chassis, > + pb->logical_port) || > + (!strcmp(pb->type, "chassisredirect") && > + if_status_reclaimed(ctrl_ctx->if_mgr, pb->logical_port)))) { > > We should receive the notification from idl when the PB gets bound on another chassis which will be followed by notification that it was once again bound by this chassis. As discussed offline incremental processing might not help in case when recompute is triggered by runtime_data, but we could still utilize the if_status_mgr by having node that would track only recently claimed DGPs. So PB would be be incrementally processed, in case of recompute we would take a look at the recently claimed DGPs. WDYT? /* XXX: actually handle this incrementally. */ > return EN_UNHANDLED; > } > -- > 2.47.1 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev