On Tue, Jun 16, 2026 at 5:49 PM Xavier Simonart via dev <
[email protected]> wrote:
> Usually mac_binding_stats_run and mac_binding_probe_stats_run are executed
> either both with (since_updated < threshold->cooldown_period), and sb is
> not updated, or both with (since_updated >= threshold->cooldown_period),
> and sb is updated but ARP is not sent
> (until stats->idle_age_ms > threshold->value).
>
> However, if mac_binding_stats_run is executed as
> (since_updated < threshold->cooldown_period), it does not update
> mac_binding, as expected.
> Then, if mac_binding_probe_stats_run is executed just afterwards, but with
> (since_updated_ms >= threshold->cooldown_period), ARP is sent.
>
> Fix this by calling time_wall_msec() once in statctrl_run() and passing it
> to all stats_run functions.
>
> This was identified through random failures of test
> "MAC binding aging - probing GW router Dynamic Neigh".
>
> Fixes: 1e4d4409f391 ("controller: Send ARP/ND for stale mac_bindings
> entries.")
> Signed-off-by: Xavier Simonart <[email protected]>
> ---
> controller/mac-cache.c | 10 ++++------
> controller/mac-cache.h | 7 ++++---
> controller/statctrl.c | 7 +++++--
> 3 files changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/controller/mac-cache.c b/controller/mac-cache.c
> index c996fd6b9..58f42f2ea 100644
> --- a/controller/mac-cache.c
> +++ b/controller/mac-cache.c
> @@ -399,10 +399,9 @@ mac_binding_update_log(const char *action,
>
> void
> mac_binding_stats_run(struct vector *stats_vec, uint64_t *req_delay,
> - void *data)
> + void *data, long long timewall_now)
> {
> struct mac_cache_data *cache_data = data;
> - long long timewall_now = time_wall_msec();
>
> struct mac_cache_stats *stats;
> VECTOR_FOR_EACH_PTR (stats_vec, stats) {
> @@ -495,10 +494,10 @@ fdb_update_log(const char *action,
> }
>
> void
> -fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void *data)
> +fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void *data,
> + long long timewall_now)
> {
> struct mac_cache_data *cache_data = data;
> - long long timewall_now = time_wall_msec();
>
> struct mac_cache_stats *stats;
> VECTOR_FOR_EACH_PTR (stats_vec, stats) {
> @@ -877,9 +876,8 @@ mac_binding_probe_stats_process_flow_stats(
>
> void
> mac_binding_probe_stats_run(struct vector *stats_vec, uint64_t *req_delay,
> - void *data)
> + void *data, long long timewall_now)
> {
> - long long timewall_now = time_wall_msec();
> struct mac_binding_probe_data *probe_data = data;
> struct mac_cache_data *cache_data = probe_data->cache_data;
>
> diff --git a/controller/mac-cache.h b/controller/mac-cache.h
> index 365219d33..8abea60c7 100644
> --- a/controller/mac-cache.h
> +++ b/controller/mac-cache.h
> @@ -199,13 +199,14 @@ mac_binding_stats_process_flow_stats(struct vector
> *stats_vec,
> struct ofputil_flow_stats
> *ofp_stats);
>
> void mac_binding_stats_run(struct vector *stats_vec, uint64_t *req_delay,
> - void *data);
> + void *data, long long timewall_now);
>
> /* FDB stat processing. */
> void fdb_stats_process_flow_stats(struct vector *stats_vec,
> struct ofputil_flow_stats *ofp_stats);
>
> -void fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void
> *data);
> +void fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void
> *data,
> + long long timewall_now);
>
> /* Packet buffering. */
> void bp_packet_data_destroy(struct bp_packet_data *pd);
> @@ -234,6 +235,6 @@ void mac_binding_probe_stats_process_flow_stats(
> struct ofputil_flow_stats *ofp_stats);
>
> void mac_binding_probe_stats_run(struct vector *stats_vec, uint64_t
> *req_delay,
> - void *data);
> + void *data, long long timewall_now);
>
> #endif /* controller/mac-cache.h */
> diff --git a/controller/statctrl.c b/controller/statctrl.c
> index f9f0a30f1..00c0a4450 100644
> --- a/controller/statctrl.c
> +++ b/controller/statctrl.c
> @@ -64,7 +64,8 @@ struct stats_node {
> struct ofputil_flow_stats *ofp_stats);
> /* Function to process the parsed stats.
> * This function runs in main thread locked behind mutex. */
> - void (*run)(struct vector *stats, uint64_t *req_delay, void *data);
> + void (*run)(struct vector *stats, uint64_t *req_delay, void *data,
> + long long timewall_now);
> /* Name of the stats node. */
> const char *name;
> };
> @@ -194,6 +195,7 @@ statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>
> bool schedule_updated = false;
> long long now = time_msec();
> + long long timewall_now = time_wall_msec();
>
> ovs_mutex_lock(&mutex);
> statctrl_ctx.new_main_seq = seq_read(statctrl_ctx.main_seq);
> @@ -202,7 +204,8 @@ statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> uint64_t prev_delay = node->request_delay;
>
> stopwatch_start(node->name, time_msec());
> - node->run(&node->stats, &node->request_delay, node_data[i]);
> + node->run(&node->stats, &node->request_delay, node_data[i],
> + timewall_now);
> vector_clear(&node->stats);
> if (vector_capacity(&node->stats) >=
> STATS_VEC_CAPACITY_THRESHOLD) {
> VLOG_DBG("The statistics vector for node '%s' capacity "
> --
> 2.47.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Hi Xavier,
this looks like a pretty rare and unfortunate race, however, it's still the
right thing to do.
Acked-by: Ales Musil <[email protected]>
Regards,
Ales
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev