On Wed, Mar 4, 2026 at 10:33 AM Felix Moebius via dev <
[email protected]> wrote:
> The mechanism for refreshing mac binding entries will send out an ARP/ND
> request using the logical port's mac address, regardless of whether the
> port is local to a given chassis.
>
> For gateway ports this will make a given LRP incorrectly appear to be
> claimed on the chassis that sends out the request, thereby confusing
> physical switches and other gateway chassis, which ultimately results in
> broken connectivity for that LRP.
> In particular, when the chassis that has actually claimed the LRP, sees
> the request from the other chassis with the mac address from its LRP,
> the OVS bridge that provides the external connectivity will incorrectly
> relearn the LRP's mac address on the port that is connected to the
> physical network which causes it to drop all further traffic destined
> to that LRP until egress traffic coming from the LRP causes it to
> move the mac back to the correct port or the FDB entry times out.
>
> Add a check to verify that a logical port from a mac binding is local to
> a given chassis before trying to refresh it. Also refactor the statctrl
> handler part a bit such that we don't keep passing more and more
> parameters to the handlers.
>
> Fixes: 1e4d4409f391 ("controller: Send ARP/ND for stale mac_bindings
> entries.")
> Signed-off-by: Felix Moebius <[email protected]>
> ---
> v2:
> - Removed handler specific parameters in statctrl
> - Avoided double lookup for port binding + moved the locality check to
> a later point to avoid unnecessary lookups
> ---
>
Hi Felix,
thank you for v2.Could you please split the refactor and the
fix to separate commits in the next iteration? It would also be
nice to have a test that showcases the problematic scenario.
Some small comments down below.
> controller/lport.c | 19 ++++++++++-----
> controller/lport.h | 3 +++
> controller/mac-cache.c | 47 ++++++++++++++++++++-----------------
> controller/mac-cache.h | 24 +++++++++----------
> controller/ovn-controller.c | 2 +-
> controller/statctrl.c | 23 ++++++++++--------
> controller/statctrl.h | 1 +
> 7 files changed, 68 insertions(+), 51 deletions(-)
>
> diff --git a/controller/lport.c b/controller/lport.c
> index b30bcd398..9cb7308ce 100644
> --- a/controller/lport.c
> +++ b/controller/lport.c
> @@ -89,13 +89,10 @@ lport_is_chassis_resident(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> }
>
> bool
> -lport_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> - const struct sbrec_chassis *chassis,
> - const char *port_name)
> +lport_pb_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> + const struct sbrec_port_binding *pb)
> {
> - const struct sbrec_port_binding *pb = lport_lookup_by_name(
> - sbrec_port_binding_by_name, port_name);
> -
> if (!pb) {
> return false;
> }
> @@ -115,6 +112,16 @@ lport_is_local(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> return lport_pb_is_chassis_resident(chassis, cr_pb);
> }
>
> +bool
> +lport_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> + const char *port_name)
> +{
> + const struct sbrec_port_binding *pb = lport_lookup_by_name(
> + sbrec_port_binding_by_name, port_name);
> + return lport_pb_is_local(sbrec_port_binding_by_name, chassis, pb);
> +}
> +
> const struct sbrec_port_binding *
> lport_get_peer(const struct sbrec_port_binding *pb,
> struct ovsdb_idl_index *sbrec_port_binding_by_name)
> diff --git a/controller/lport.h b/controller/lport.h
> index 6d48301d2..4465c4242 100644
> --- a/controller/lport.h
> +++ b/controller/lport.h
> @@ -69,6 +69,9 @@ bool lport_pb_is_chassis_resident(const struct
> sbrec_chassis *chassis,
> bool lport_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> const struct sbrec_chassis *chassis,
> const char *port_name);
> +bool lport_pb_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> + const struct sbrec_port_binding *pb);
> const struct sbrec_port_binding *lport_get_peer(
> const struct sbrec_port_binding *,
> struct ovsdb_idl_index *sbrec_port_binding_by_name);
> diff --git a/controller/mac-cache.c b/controller/mac-cache.c
> index 4f859b7ea..98d635df1 100644
> --- a/controller/mac-cache.c
> +++ b/controller/mac-cache.c
> @@ -399,10 +399,8 @@ mac_binding_update_log(const char *action,
> }
>
> void
> -mac_binding_stats_run(
> - struct rconn *swconn OVS_UNUSED,
> - struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
> - struct vector *stats_vec, uint64_t *req_delay, void *data)
> +mac_binding_stats_run(struct vector *stats_vec, uint64_t *req_delay,
> + void *data)
> {
> struct mac_cache_data *cache_data = data;
> long long timewall_now = time_wall_msec();
> @@ -445,7 +443,7 @@ mac_binding_stats_run(
>
> mac_cache_update_req_delay(&cache_data->thresholds, req_delay);
> if (*req_delay) {
> - VLOG_DBG("MAC binding statistics dalay: %"PRIu64, *req_delay);
> + VLOG_DBG("MAC binding statistics delay: %"PRIu64, *req_delay);
> }
> }
>
> @@ -498,10 +496,7 @@ fdb_update_log(const char *action,
> }
>
> void
> -fdb_stats_run(struct rconn *swconn OVS_UNUSED,
> - struct ovsdb_idl_index *sbrec_port_binding_by_name
> OVS_UNUSED,
> - struct vector *stats_vec,
> - uint64_t *req_delay, void *data)
> +fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void *data)
> {
> struct mac_cache_data *cache_data = data;
> long long timewall_now = time_wall_msec();
> @@ -543,7 +538,7 @@ fdb_stats_run(struct rconn *swconn OVS_UNUSED,
>
> mac_cache_update_req_delay(&cache_data->thresholds, req_delay);
> if (*req_delay) {
> - VLOG_DBG("FDB entry statistics dalay: %"PRIu64, *req_delay);
> + VLOG_DBG("FDB entry statistics delay: %"PRIu64, *req_delay);
> }
> }
>
> @@ -868,19 +863,17 @@ mac_binding_probe_stats_process_flow_stats(
> }
>
> void
> -mac_binding_probe_stats_run(
> - struct rconn *swconn,
> - struct ovsdb_idl_index *sbrec_port_binding_by_name,
> - struct vector *stats_vec,
> - uint64_t *req_delay, void *data)
> +mac_binding_probe_stats_run(struct vector *stats_vec, uint64_t *req_delay,
> + void *data)
> {
> long long timewall_now = time_wall_msec();
> - struct mac_cache_data *cache_data = data;
> + struct mac_binding_probe_data *probe_data = data;
>
> struct mac_cache_stats *stats;
> VECTOR_FOR_EACH_PTR (stats_vec, stats) {
> - struct mac_binding *mb =
> mac_binding_find(&cache_data->mac_bindings,
> - &stats->data.mb);
> + struct mac_binding *mb =
> + mac_binding_find(&probe_data->cache_data->mac_bindings,
> + &stats->data.mb);
>
nit: We can add "struct mac_cache_data *cache_data =
probe_data->cache_data;"
above so you don't have to change all those finds.
if (!mb) {
> mac_binding_update_log("Probe: not found in the cache:",
> &stats->data.mb, false, NULL, 0, 0);
> @@ -888,7 +881,8 @@ mac_binding_probe_stats_run(
> }
>
> struct mac_cache_threshold *threshold =
> - mac_cache_threshold_find(cache_data, mb->data.dp_key);
> + mac_cache_threshold_find(probe_data->cache_data,
> + mb->data.dp_key);
> uint64_t since_updated_ms = timewall_now - mb->sbrec->timestamp;
> const struct sbrec_mac_binding *sbrec = mb->sbrec;
>
> @@ -908,12 +902,21 @@ mac_binding_probe_stats_run(
> }
>
> const struct sbrec_port_binding *pb =
> - lport_lookup_by_name(sbrec_port_binding_by_name,
> + lport_lookup_by_name(probe_data->sbrec_port_binding_by_name,
> sbrec->logical_port);
> +
>
nit: Unrelated newline.
> if (!pb) {
> continue;
> }
>
> + if (!lport_pb_is_local(probe_data->sbrec_port_binding_by_name,
> + probe_data->chassis, pb)) {
+ mac_binding_update_log("Not sending ARP/ND request for
> non-local",
> + &mb->data, true, threshold,
> + stats->idle_age_ms, since_updated_ms);
> + continue;
> + }
> +
> struct lport_addresses laddr;
> if (!extract_lsp_addresses(pb->mac[0], &laddr)) {
> continue;
> @@ -930,7 +933,7 @@ mac_binding_probe_stats_run(
> &mb->data, true, threshold,
> stats->idle_age_ms, since_updated_ms);
>
> - send_self_originated_neigh_packet(swconn,
> + send_self_originated_neigh_packet(probe_data->swconn,
> sbrec->datapath->tunnel_key,
> pb->tunnel_key, laddr.ea,
> &local, &mb->data.ip,
> @@ -940,7 +943,7 @@ mac_binding_probe_stats_run(
> destroy_lport_addresses(&laddr);
> }
>
> - mac_cache_update_req_delay(&cache_data->thresholds, req_delay);
> + mac_cache_update_req_delay(&probe_data->cache_data->thresholds,
> req_delay);
> if (*req_delay) {
> VLOG_DBG("MAC probe binding statistics delay: %"PRIu64,
> *req_delay);
> }
> diff --git a/controller/mac-cache.h b/controller/mac-cache.h
> index d7b5b9cd5..7edb129d7 100644
> --- a/controller/mac-cache.h
> +++ b/controller/mac-cache.h
> @@ -63,6 +63,13 @@ struct mac_binding_data {
> struct eth_addr mac;
> };
>
> +struct mac_binding_probe_data {
> + struct mac_cache_data *cache_data;
> + struct rconn *swconn;
> + struct ovsdb_idl_index *sbrec_port_binding_by_name;
> + const struct sbrec_chassis *chassis;
> +};
> +
> struct mac_binding {
> struct hmap_node hmap_node;
> /* Common data to identify MAC binding. */
> @@ -191,19 +198,14 @@ void
> mac_binding_stats_process_flow_stats(struct vector *stats_vec,
> struct ofputil_flow_stats
> *ofp_stats);
>
> -void mac_binding_stats_run(
> - struct rconn *swconn OVS_UNUSED,
> - struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
> - struct vector *stats_vec, uint64_t *req_delay, void *data);
> +void mac_binding_stats_run(struct vector *stats_vec, uint64_t *req_delay,
> + void *data);
>
> /* FDB stat processing. */
> void fdb_stats_process_flow_stats(struct vector *stats_vec,
> struct ofputil_flow_stats *ofp_stats);
>
> -void fdb_stats_run(
> - struct rconn *swconn OVS_UNUSED,
> - struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
> - struct vector *stats_vec, uint64_t *req_delay, void *data);
> +void fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void
> *data);
>
> /* Packet buffering. */
> void bp_packet_data_destroy(struct bp_packet_data *pd);
> @@ -235,9 +237,7 @@ void mac_binding_probe_stats_process_flow_stats(
> struct vector *stats_vec,
> struct ofputil_flow_stats *ofp_stats);
>
> -void mac_binding_probe_stats_run(
> - struct rconn *swconn,
> - struct ovsdb_idl_index *sbrec_port_binding_by_name,
> - struct vector *stats_vec, uint64_t *req_delay, void *data);
> +void mac_binding_probe_stats_run(struct vector *stats_vec, uint64_t
> *req_delay,
> + void *data);
>
> #endif /* controller/mac-cache.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 4353f6094..f57618273 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -7843,7 +7843,7 @@ main(int argc, char *argv[])
> mac_cache_data = engine_get_data(&en_mac_cache);
> if (mac_cache_data) {
> statctrl_run(ovnsb_idl_txn,
> sbrec_port_binding_by_name,
> - mac_cache_data);
> + chassis, mac_cache_data);
> }
>
> ofctrl_seqno_update_create(
> diff --git a/controller/statctrl.c b/controller/statctrl.c
> index a76bac056..9f3e4bc39 100644
> --- a/controller/statctrl.c
> +++ b/controller/statctrl.c
> @@ -64,10 +64,7 @@ struct stats_node {
> struct ofputil_flow_stats *ofp_stats);
> /* Function to process the parsed stats.
> * This function runs in main thread locked behind mutex. */
> - void (*run)(struct rconn *swconn,
> - struct ovsdb_idl_index *sbrec_port_binding_by_name,
> - struct vector *stats,
> - uint64_t *req_delay, void *data);
> + void (*run)(struct vector *stats, uint64_t *req_delay, void *data);
> /* Name of the stats node. */
> const char *name;
> };
> @@ -175,16 +172,24 @@ statctrl_init(void)
> void
> statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> struct mac_cache_data *mac_cache_data)
> {
> if (!ovnsb_idl_txn) {
> return;
> }
>
> + struct mac_binding_probe_data mac_binding_probe_data = {
> + .cache_data = mac_cache_data,
> + .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> + .chassis = chassis,
> + .swconn = statctrl_ctx.swconn,
> + };
> +
> void *node_data[STATS_MAX] = {
> - mac_cache_data,
> - mac_cache_data,
> - mac_cache_data
> + [STATS_MAC_BINDING] = mac_cache_data,
> + [STATS_FDB] = mac_cache_data,
> + [STATS_MAC_BINDING_PROBE] = &mac_binding_probe_data,
> };
>
> bool schedule_updated = false;
> @@ -197,9 +202,7 @@ statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> uint64_t prev_delay = node->request_delay;
>
> stopwatch_start(node->name, time_msec());
> - node->run(statctrl_ctx.swconn,
> - sbrec_port_binding_by_name, &node->stats,
> - &node->request_delay, node_data[i]);
> + node->run(&node->stats, &node->request_delay, node_data[i]);
> vector_clear(&node->stats);
> if (vector_capacity(&node->stats) >=
> STATS_VEC_CAPACITY_THRESHOLD) {
> VLOG_DBG("The statistics vector for node '%s' capacity "
> diff --git a/controller/statctrl.h b/controller/statctrl.h
> index 69a0b6f35..3e56b4a54 100644
> --- a/controller/statctrl.h
> +++ b/controller/statctrl.h
> @@ -21,6 +21,7 @@
> void statctrl_init(void);
> void statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> struct mac_cache_data *mac_cache_data);
>
> void statctrl_update_swconn(const char *target, int probe_interval);
> --
> 2.52.0
>
>
>
>
> Diese E Mail enthält möglicherweise vertrauliche Inhalte und ist nur für
> die Verwertung durch den vorgesehenen Empfänger bestimmt.
> Sollten Sie nicht der vorgesehene Empfänger sein, setzen Sie den Absender
> bitte unverzüglich in Kenntnis und löschen diese E Mail.
>
> Unsere Datenschutzhinweise finden Sie
> hier
> .
>
> This e-mail may contain confidential content and is intended only for the
> specified recipient/s.
> If you are not the intended recipient, please inform the sender
> immediately and delete this e-mail.
>
> Our privacy policy can be found here.
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Regards,
Ales
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev