On Fri, Feb 27, 2026 at 2:48 PM Felix Moebius via dev <
[email protected]> wrote:

> The mechanism for refreshing mac binding entries will send out an ARP/ND
> request using the logical port's mac address, regardless of whether the
> port is local to a given chassis.
>
> For gateway ports this will make a given LRP incorrectly appear to be
> claimed on the chassis that sends out the request, thereby confusing
> physical switches and other gateway chassis, which ultimately results in
> broken connectivity for that LRP.
> In particular, when the chassis that has actually claimed the LRP, sees
> the request from the other chassis with the mac address from its LRP,
> the OVS bridge that provides the external connectivity will incorrectly
> relearn the LRP's mac address on the port that is connected to the
> physical network which causes it to drop all further traffic destined
> to that LRP until egress traffic coming from the LRP causes it to
> move the mac back to the correct port or the FDB entry times out.
>
> Add a check to verify that a logical port from a mac binding is local to
> a given chassis before trying to refresh it.
>
> Fixes: 1e4d4409f391 ("controller: Send ARP/ND for stale mac_bindings
> entries.")
> Signed-off-by: Felix Moebius <[email protected]>
> ---
>

Hi Felix,

thank you for the fix. It makes sense I have just two comments
down below.


>  controller/mac-cache.c      | 13 ++++++++++++-
>  controller/mac-cache.h      |  3 +++
>  controller/ovn-controller.c |  2 +-
>  controller/statctrl.c       |  7 ++++---
>  controller/statctrl.h       |  1 +
>  5 files changed, 21 insertions(+), 5 deletions(-)
>
> diff --git a/controller/mac-cache.c b/controller/mac-cache.c
> index 4f859b7ea..0c8e5260b 100644
> --- a/controller/mac-cache.c
> +++ b/controller/mac-cache.c
> @@ -402,6 +402,7 @@ void
>  mac_binding_stats_run(
>          struct rconn *swconn OVS_UNUSED,
>          struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
> +        const struct sbrec_chassis *chassis OVS_UNUSED,
>          struct vector *stats_vec, uint64_t *req_delay, void *data)
>  {
>      struct mac_cache_data *cache_data = data;
> @@ -445,7 +446,7 @@ mac_binding_stats_run(
>
>      mac_cache_update_req_delay(&cache_data->thresholds, req_delay);
>      if (*req_delay) {
> -        VLOG_DBG("MAC binding statistics dalay: %"PRIu64, *req_delay);
> +        VLOG_DBG("MAC binding statistics delay: %"PRIu64, *req_delay);
>      }
>  }
>
> @@ -500,6 +501,7 @@ fdb_update_log(const char *action,
>  void
>  fdb_stats_run(struct rconn *swconn OVS_UNUSED,
>                struct ovsdb_idl_index *sbrec_port_binding_by_name
> OVS_UNUSED,
> +              const struct sbrec_chassis *chassis OVS_UNUSED,
>                struct vector *stats_vec,
>                uint64_t *req_delay, void *data)
>  {
> @@ -871,6 +873,7 @@ void
>  mac_binding_probe_stats_run(
>          struct rconn *swconn,
>          struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +        const struct sbrec_chassis *chassis,
>          struct vector *stats_vec,
>          uint64_t *req_delay, void *data)
>  {
> @@ -892,6 +895,14 @@ mac_binding_probe_stats_run(
>          uint64_t since_updated_ms = timewall_now - mb->sbrec->timestamp;
>          const struct sbrec_mac_binding *sbrec = mb->sbrec;
>
> +        if (!lport_is_local(sbrec_port_binding_by_name, chassis,
> +                            sbrec->logical_port)) {

+            mac_binding_update_log("Not sending ARP/ND request for
> non-local",
> +                                   &mb->data, true, threshold,
> +                                   stats->idle_age_ms, since_updated_ms);
> +            continue;
> +        }
>

I wonder if we can actually do this filtering in "statctrl_run"?
It's a bit unfortunate that we have to pass yet another parameter
that would be used by a single function. WDYT?

Looking more at the code it might be actually beneficial to have the
port_binding ready before we call the run function.


+
>          if (stats->idle_age_ms > threshold->value) {
>              mac_binding_update_log("Not sending ARP/ND request for
> non-active",
>                                     &mb->data, true, threshold,
> diff --git a/controller/mac-cache.h b/controller/mac-cache.h
> index d7b5b9cd5..d05d964e6 100644
> --- a/controller/mac-cache.h
> +++ b/controller/mac-cache.h
> @@ -194,6 +194,7 @@ mac_binding_stats_process_flow_stats(struct vector
> *stats_vec,
>  void mac_binding_stats_run(
>          struct rconn *swconn OVS_UNUSED,
>          struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
> +        const struct sbrec_chassis *chassis OVS_UNUSED,
>          struct vector *stats_vec, uint64_t *req_delay, void *data);
>
>  /* FDB stat processing. */
> @@ -203,6 +204,7 @@ void fdb_stats_process_flow_stats(struct vector
> *stats_vec,
>  void fdb_stats_run(
>          struct rconn *swconn OVS_UNUSED,
>          struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
> +        const struct sbrec_chassis *chassis OVS_UNUSED,
>          struct vector *stats_vec, uint64_t *req_delay, void *data);
>
>  /* Packet buffering. */
> @@ -238,6 +240,7 @@ void mac_binding_probe_stats_process_flow_stats(
>  void mac_binding_probe_stats_run(
>          struct rconn *swconn,
>          struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +        const struct sbrec_chassis *chassis,
>          struct vector *stats_vec, uint64_t *req_delay, void *data);
>
>  #endif /* controller/mac-cache.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 4353f6094..f57618273 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -7843,7 +7843,7 @@ main(int argc, char *argv[])
>                      mac_cache_data = engine_get_data(&en_mac_cache);
>                      if (mac_cache_data) {
>                          statctrl_run(ovnsb_idl_txn,
> sbrec_port_binding_by_name,
> -                                     mac_cache_data);
> +                                     chassis, mac_cache_data);
>                      }
>
>                      ofctrl_seqno_update_create(
> diff --git a/controller/statctrl.c b/controller/statctrl.c
> index a76bac056..083696d46 100644
> --- a/controller/statctrl.c
> +++ b/controller/statctrl.c
> @@ -66,6 +66,7 @@ struct stats_node {
>       * This function runs in main thread locked behind mutex. */
>      void (*run)(struct rconn *swconn,
>                  struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +                const struct sbrec_chassis *chassis,
>                  struct vector *stats,
>                  uint64_t *req_delay, void *data);
>      /* Name of the stats node. */
> @@ -175,6 +176,7 @@ statctrl_init(void)
>  void
>  statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>               struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +             const struct sbrec_chassis *chassis,
>               struct mac_cache_data *mac_cache_data)
>  {
>      if (!ovnsb_idl_txn) {
> @@ -197,9 +199,8 @@ statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>          uint64_t prev_delay = node->request_delay;
>
>          stopwatch_start(node->name, time_msec());
> -        node->run(statctrl_ctx.swconn,
> -                  sbrec_port_binding_by_name, &node->stats,
> -                  &node->request_delay, node_data[i]);
> +        node->run(statctrl_ctx.swconn, sbrec_port_binding_by_name,
> chassis,
> +                  &node->stats, &node->request_delay, node_data[i]);
>          vector_clear(&node->stats);
>          if (vector_capacity(&node->stats) >=
> STATS_VEC_CAPACITY_THRESHOLD) {
>              VLOG_DBG("The statistics vector for node '%s' capacity "
> diff --git a/controller/statctrl.h b/controller/statctrl.h
> index 69a0b6f35..3e56b4a54 100644
> --- a/controller/statctrl.h
> +++ b/controller/statctrl.h
> @@ -21,6 +21,7 @@
>  void statctrl_init(void);
>  void statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                    struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +                  const struct sbrec_chassis *chassis,
>                    struct mac_cache_data *mac_cache_data);
>
>  void statctrl_update_swconn(const char *target, int probe_interval);
> --
> 2.52.0
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev



Regards,
Ales
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to