On Tue, Jun 16, 2026 at 5:49 PM Xavier Simonart via dev <
[email protected]> wrote:

> Usually mac_binding_stats_run and mac_binding_probe_stats_run are executed
> either both with (since_updated < threshold->cooldown_period), and sb is
> not updated, or both with (since_updated >= threshold->cooldown_period),
> and sb is updated but ARP is not sent
> (until stats->idle_age_ms > threshold->value).
>
> However, if mac_binding_stats_run is executed as
> (since_updated < threshold->cooldown_period), it does not update
> mac_binding, as expected.
> Then, if mac_binding_probe_stats_run is executed just afterwards, but with
> (since_updated_ms >= threshold->cooldown_period), ARP is sent.
>
> Fix this by calling time_wall_msec() once in statctrl_run() and passing it
> to all stats_run functions.
>
> This was identified through random failures of test
> "MAC binding aging - probing GW router Dynamic Neigh".
>
> Fixes: 1e4d4409f391 ("controller: Send ARP/ND for stale mac_bindings
> entries.")
> Signed-off-by: Xavier Simonart <[email protected]>
> ---
>  controller/mac-cache.c | 10 ++++------
>  controller/mac-cache.h |  7 ++++---
>  controller/statctrl.c  |  7 +++++--
>  3 files changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/controller/mac-cache.c b/controller/mac-cache.c
> index c996fd6b9..58f42f2ea 100644
> --- a/controller/mac-cache.c
> +++ b/controller/mac-cache.c
> @@ -399,10 +399,9 @@ mac_binding_update_log(const char *action,
>
>  void
>  mac_binding_stats_run(struct vector *stats_vec, uint64_t *req_delay,
> -                      void *data)
> +                      void *data, long long timewall_now)
>  {
>      struct mac_cache_data *cache_data = data;
> -    long long timewall_now = time_wall_msec();
>
>      struct mac_cache_stats *stats;
>      VECTOR_FOR_EACH_PTR (stats_vec, stats) {
> @@ -495,10 +494,10 @@ fdb_update_log(const char *action,
>  }
>
>  void
> -fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void *data)
> +fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void *data,
> +              long long timewall_now)
>  {
>      struct mac_cache_data *cache_data = data;
> -    long long timewall_now = time_wall_msec();
>
>      struct mac_cache_stats *stats;
>      VECTOR_FOR_EACH_PTR (stats_vec, stats) {
> @@ -877,9 +876,8 @@ mac_binding_probe_stats_process_flow_stats(
>
>  void
>  mac_binding_probe_stats_run(struct vector *stats_vec, uint64_t *req_delay,
> -                            void *data)
> +                            void *data, long long timewall_now)
>  {
> -    long long timewall_now = time_wall_msec();
>      struct mac_binding_probe_data *probe_data = data;
>      struct mac_cache_data *cache_data = probe_data->cache_data;
>
> diff --git a/controller/mac-cache.h b/controller/mac-cache.h
> index 365219d33..8abea60c7 100644
> --- a/controller/mac-cache.h
> +++ b/controller/mac-cache.h
> @@ -199,13 +199,14 @@ mac_binding_stats_process_flow_stats(struct vector
> *stats_vec,
>                                       struct ofputil_flow_stats
> *ofp_stats);
>
>  void mac_binding_stats_run(struct vector *stats_vec, uint64_t *req_delay,
> -                           void *data);
> +                           void *data, long long timewall_now);
>
>  /* FDB stat processing. */
>  void fdb_stats_process_flow_stats(struct vector *stats_vec,
>                                    struct ofputil_flow_stats *ofp_stats);
>
> -void fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void
> *data);
> +void fdb_stats_run(struct vector *stats_vec, uint64_t *req_delay, void
> *data,
> +                   long long timewall_now);
>
>  /* Packet buffering. */
>  void bp_packet_data_destroy(struct bp_packet_data *pd);
> @@ -234,6 +235,6 @@ void mac_binding_probe_stats_process_flow_stats(
>          struct ofputil_flow_stats *ofp_stats);
>
>  void mac_binding_probe_stats_run(struct vector *stats_vec, uint64_t
> *req_delay,
> -                                 void *data);
> +                                 void *data, long long timewall_now);
>
>  #endif /* controller/mac-cache.h */
> diff --git a/controller/statctrl.c b/controller/statctrl.c
> index f9f0a30f1..00c0a4450 100644
> --- a/controller/statctrl.c
> +++ b/controller/statctrl.c
> @@ -64,7 +64,8 @@ struct stats_node {
>                                 struct ofputil_flow_stats *ofp_stats);
>      /* Function to process the parsed stats.
>       * This function runs in main thread locked behind mutex. */
> -    void (*run)(struct vector *stats, uint64_t *req_delay, void *data);
> +    void (*run)(struct vector *stats, uint64_t *req_delay, void *data,
> +                long long timewall_now);
>      /* Name of the stats node. */
>      const char *name;
>  };
> @@ -194,6 +195,7 @@ statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>
>      bool schedule_updated = false;
>      long long now = time_msec();
> +    long long timewall_now = time_wall_msec();
>
>      ovs_mutex_lock(&mutex);
>      statctrl_ctx.new_main_seq = seq_read(statctrl_ctx.main_seq);
> @@ -202,7 +204,8 @@ statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>          uint64_t prev_delay = node->request_delay;
>
>          stopwatch_start(node->name, time_msec());
> -        node->run(&node->stats, &node->request_delay, node_data[i]);
> +        node->run(&node->stats, &node->request_delay, node_data[i],
> +                  timewall_now);
>          vector_clear(&node->stats);
>          if (vector_capacity(&node->stats) >=
> STATS_VEC_CAPACITY_THRESHOLD) {
>              VLOG_DBG("The statistics vector for node '%s' capacity "
> --
> 2.47.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Hi Xavier,

this looks like a pretty rare and unfortunate race, however, it's still the
right thing to do.

Acked-by: Ales Musil <[email protected]>

Regards,
Ales
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to