On Tue, Jun 9, 2026 at 12:47 PM Ales Musil <[email protected]> wrote:

> The put_ARP/ND/FDB were bound to pinctrl lock to prevent data races
> as they have used shared hmap to store the data that were later on
> added into SB. This is problematic in situations when main thread
> holds the lock and we have to process any put_ARP/ND/FDB packet. That
> contention would block the pinctrl thread from processing the
> incoming packets. The same can happen from the opposite side, pinctrl
> could be holding a lock, delaying main thread from processing other
> updates.
>
> To prevent that use the hmap only by main thread, this will ensure
> that we wait properly for the multicast timeout to expire and at the
> same time to do the deduplication. Once the entry arrives it will
> be shared by thread-safe ring buffer and consumed by main thread.
>
> Reported-at: https://redhat.atlassian.net/browse/FDP-3924
> Signed-off-by: Ales Musil <[email protected]>
> ---
>  controller/pinctrl.c | 168 +++++++++++++++++++++++++------------------
>  1 file changed, 98 insertions(+), 70 deletions(-)
>
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index bb8d20e7f..cb925d96a 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -65,6 +65,7 @@
>  #include "ovn-sb-idl.h"
>  #include "ovn-dns.h"
>  #include "garp_rarp.h"
> +#include "spsc-ring.h"
>
>  VLOG_DEFINE_THIS_MODULE(pinctrl);
>
> @@ -83,24 +84,31 @@ VLOG_DEFINE_THIS_MODULE(pinctrl);
>   * and pinctrl_run().
>   *
>   *
> - *   - put_arp/put_nd - These actions stores the IPv4/IPv6 and MAC
> addresses
> + *   - put_arp/put_nd - These actions store the IPv4/IPv6 and MAC
> addresses
>   *                      in the 'MAC_Binding' table.
>   *                      The function 'pinctrl_handle_put_mac_binding()'
> (which
> - *                      is called with in the pinctrl_handler thread),
> stores
> - *                      the IPv4/IPv6 and MAC addresses in the
> - *                      hmap - put_mac_bindings.
> + *                      is called within the pinctrl_handler thread),
> pushes
> + *                      the IPv4/IPv6 and MAC addresses directly into a
> + *                      lock-free SPSC ring buffer ('mac_bindings_ring').
> + *                      This does not require pinctrl_mutex.  For
> multicast
> + *                      replies, a special cookie value
> + *                      (MAC_BINDINGS_MC_COOKIE) is set to signal the main
> + *                      thread to apply a random delay.
>   *
> - *                      pinctrl_run(), reads these mac bindings from the
> hmap
> - *                      'put_mac_bindings' and writes to the 'MAC_Binding'
> - *                      table in the Southbound DB.
> + *                      pinctrl_run() drains the ring in
> + *                      run_put_mac_bindings(), inserts entries into the
> + *                      main-thread-only hmap 'put_mac_bindings' (applying
> + *                      a random delay for multicast replies to avoid
> + *                      thundering herd), and writes expired entries to
> the
> + *                      'MAC_Binding' table in the Southbound DB.
>   *
>   *   - arp/nd_ns      - These actions generate an ARP/IPv6 Neighbor
> solicit
>   *                      requests. The original packets are buffered and
>   *                      injected back when put_arp/put_nd resolves
>   *                      corresponding ARP/IPv6 Neighbor solicit requests.
> - *                      When pinctrl_run(), writes the mac bindings from
> the
> - *                      'put_mac_bindings' hmap to the MAC_Binding table
> in
> - *                      SB DB, run_buffered_binding will add the buffered
> + *                      When pinctrl_run() writes the mac bindings from
> the
> + *                      ring buffer to the MAC_Binding table in SB DB,
> + *                      run_buffered_binding will add the buffered
>   *                      packets to buffered_mac_bindings and notify
>   *                      pinctrl_handler.
>   *
> @@ -197,16 +205,14 @@ run_buffered_binding(const struct
> sbrec_mac_binding_table *mac_binding_table,
>
>  static void pinctrl_handle_put_mac_binding(const struct flow *md,
>                                             const struct flow *headers,
> -                                           bool is_arp)
> -    OVS_REQUIRES(pinctrl_mutex);
> -static void init_put_mac_bindings(void);
> -static void destroy_put_mac_bindings(void);
> +                                           bool is_arp);
> +static void init_mac_bindings(void);
> +static void destroy_mac_bindings(void);
>  static void run_put_mac_bindings(
>      struct ovsdb_idl_txn *ovnsb_idl_txn,
>      struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
>      struct ovsdb_idl_index *sbrec_port_binding_by_key,
> -    struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip)
> -    OVS_REQUIRES(pinctrl_mutex);
> +    struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip);
>  static void wait_put_mac_bindings(void);
>  static void send_mac_binding_buffered_pkts(struct rconn *swconn);
>
> @@ -366,18 +372,15 @@ static void run_put_fdb(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>                          struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
>              struct ovsdb_idl_index *sbrec_port_binding_by_key,
>              struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
> -                        struct fdb *fdb, uint64_t cur_cfg)
> -                        OVS_REQUIRES(pinctrl_mutex);
> +                        struct fdb *fdb, uint64_t cur_cfg);
>  static void run_put_fdbs(struct ovsdb_idl_txn *ovnsb_idl_txn,
>              struct ovsdb_idl_index *sbrec_port_binding_by_key,
>              struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
>                          struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
> -                        uint64_t cur_cfg)
> -                        OVS_REQUIRES(pinctrl_mutex);
> +                        uint64_t cur_cfg);
>  static void wait_put_fdbs(void);
>  static void pinctrl_handle_put_fdb(const struct flow *md,
> -                                   const struct flow *headers)
> -                                   OVS_REQUIRES(pinctrl_mutex);
> +                                   const struct flow *headers);
>
>  static void set_from_ctrl_flag_in_pkt_metadata(struct ofputil_packet_in
> *);
>
> @@ -556,7 +559,7 @@ out:
>  void
>  pinctrl_init(void)
>  {
> -    init_put_mac_bindings();
> +    init_mac_bindings();
>      init_send_arps_nds();
>      init_ipv6_ras();
>      init_ipv6_prefixd();
> @@ -3776,10 +3779,8 @@ process_packet_in(struct rconn *swconn, const
> struct ofp_header *msg)
>          break;
>
>      case ACTION_OPCODE_PUT_ARP:
> -        ovs_mutex_lock(&pinctrl_mutex);
>          pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
>                                         true);
> -        ovs_mutex_unlock(&pinctrl_mutex);
>          break;
>
>      case ACTION_OPCODE_DHCP_RELAY_REQ_CHK:
> @@ -3808,16 +3809,12 @@ process_packet_in(struct rconn *swconn, const
> struct ofp_header *msg)
>          break;
>
>      case ACTION_OPCODE_PUT_ND:
> -        ovs_mutex_lock(&pinctrl_mutex);
>          pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
>                                         false);
> -        ovs_mutex_unlock(&pinctrl_mutex);
>          break;
>
>      case ACTION_OPCODE_PUT_FDB:
> -        ovs_mutex_lock(&pinctrl_mutex);
>          pinctrl_handle_put_fdb(&pin.flow_metadata.flow, &headers);
> -        ovs_mutex_unlock(&pinctrl_mutex);
>          break;
>
>      case ACTION_OPCODE_PUT_DHCPV6_OPTS:
> @@ -4757,7 +4754,7 @@ pinctrl_destroy(void)
>      destroy_buffered_packets_map();
>      destroy_activated_ports();
>      event_table_destroy();
> -    destroy_put_mac_bindings();
> +    destroy_mac_bindings();
>      destroy_put_vport_bindings();
>      ip_mcast_snoop_destroy();
>      destroy_svc_monitors();
> @@ -4782,22 +4779,30 @@ pinctrl_destroy(void)
>
>  #define MAX_MAC_BINDING_DELAY_MSEC 50
>  #define MAX_FDB_DELAY_MSEC         50
> -#define MAX_MAC_BINDINGS           1000
> +#define MAX_MAC_BINDINGS           1024
> +#define MAC_BINDINGS_MC_COOKIE     0xff
> +
>
> -/* Contains "struct mac_binding"s. */
> +/* Contains "struct mac_binding"s. This is used by main thread only. */
>  static struct hmap put_mac_bindings;
> +/* Contains "struct mac_binding_data". This is populated by pinctrl thread
> + * and consumed by main thread. */
> +static struct spsc_ring mac_bindings_ring;
>
>  static void
> -init_put_mac_bindings(void)
> +init_mac_bindings(void)
>  {
>      hmap_init(&put_mac_bindings);
> +    spsc_ring_init(&mac_bindings_ring, MAX_MAC_BINDINGS,
> +                   sizeof (struct mac_binding_data));
>  }
>
>  static void
> -destroy_put_mac_bindings(void)
> +destroy_mac_bindings(void)
>  {
>      mac_bindings_clear(&put_mac_bindings);
>      hmap_destroy(&put_mac_bindings);
> +    spsc_ring_destroy(&mac_bindings_ring);
>  }
>
>  /* Called with in the pinctrl_handler thread context. */
> @@ -4805,14 +4810,11 @@ static void
>  pinctrl_handle_put_mac_binding(const struct flow *md,
>                                 const struct flow *headers,
>                                 bool is_arp)
> -    OVS_REQUIRES(pinctrl_mutex)
>  {
> -    if (hmap_count(&put_mac_bindings) >= MAX_MAC_BINDINGS) {
> -        COVERAGE_INC(pinctrl_drop_put_mac_binding);
> -        return;
> -    }
> -
>      struct mac_binding_data mb_data = (struct mac_binding_data) {
> +            .cookie = eth_addr_is_multicast(headers->dl_dst)
> +                      ? MAC_BINDINGS_MC_COOKIE
> +                      : 0,
>              .dp_key =  ntohll(md->metadata),
>              .port_key =  md->regs[MFF_LOG_INPORT - MFF_REG0],
>              .mac = headers->dl_src,
> @@ -4825,18 +4827,13 @@ pinctrl_handle_put_mac_binding(const struct flow
> *md,
>          memcpy(&mb_data.ip, &ip6, sizeof mb_data.ip);
>      }
>
> -    /* If the ARP reply was unicast we should not delay it,
> -     * there won't be any race. */
> -    uint32_t delay = eth_addr_is_multicast(headers->dl_dst)
> -                     ? random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1
> -                     : 0;
> -    long long timestamp = time_msec() + delay;
> -    mac_binding_add(&put_mac_bindings, mb_data, NULL, timestamp);
> -
> -    /* We can send the buffered packet once the main ovn-controller
> -     * thread calls pinctrl_run() and it writes the mac_bindings stored
> -     * in 'put_mac_bindings' hmap into the Southbound MAC_Binding table.
> */
> -    notify_pinctrl_main();
> +    if (!spsc_ring_push(&mac_bindings_ring, &mb_data)) {
> +        /* This shouldn't happen, the main thread drains the ring buffer
> +         * every iteration. */
> +        COVERAGE_INC(pinctrl_drop_put_mac_binding);
> +    } else {
> +        notify_pinctrl_main();
> +    }
>  }
>
>  /* Called with in the pinctrl_handler thread context. */
> @@ -4898,14 +4895,32 @@ run_put_mac_bindings(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>                       struct ovsdb_idl_index
> *sbrec_datapath_binding_by_key,
>                       struct ovsdb_idl_index *sbrec_port_binding_by_key,
>                       struct ovsdb_idl_index
> *sbrec_mac_binding_by_lport_ip)
> -    OVS_REQUIRES(pinctrl_mutex)
>  {
> +    long long now = time_msec();
> +
> +    struct mac_binding_data mb_data;
> +    SPSC_RING_FOR_EACH_POP (&mac_bindings_ring, mb_data) {
> +        if (hmap_count(&put_mac_bindings) >= MAX_MAC_BINDINGS) {
> +            COVERAGE_INC(pinctrl_drop_put_mac_binding);
> +            continue;
> +        }
> +
> +        /* If the ARP reply was unicast we should not delay it,
> +         * there won't be any race. */
> +        uint32_t delay = mb_data.cookie == MAC_BINDINGS_MC_COOKIE
> +                         ? random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1
> +                         : 0;
> +        /* Clear the cookie, it's needed just to know if it was delayed or
> +         * not. */
> +        mb_data.cookie = 0;
> +
> +        mac_binding_add(&put_mac_bindings, mb_data, NULL, now + delay);
> +    }
> +
>      if (!ovnsb_idl_txn) {
>          return;
>      }
>
> -    long long now = time_msec();
> -
>      struct mac_binding *mb;
>      HMAP_FOR_EACH_SAFE (mb, hmap_node, &put_mac_bindings) {
>          if (now >= mb->timestamp) {
> @@ -4986,7 +5001,6 @@ run_buffered_binding(const struct
> sbrec_mac_binding_table *mac_binding_table,
>
>  static void
>  wait_put_mac_bindings(void)
> -    OVS_REQUIRES(pinctrl_mutex)
>  {
>      struct mac_binding *mb;
>      HMAP_FOR_EACH (mb, hmap_node, &put_mac_bindings) {
> @@ -8878,9 +8892,13 @@ pinctrl_split_buf_action_handler(struct rconn
> *swconn, struct dp_packet *pkt,
>      ofpbuf_uninit(&ofpacts);
>  }
>
> -#define MAX_FDB_ENTRIES             1000
> +#define MAX_FDB_ENTRIES             1024
>
> +/* Contains "struct fdb"s. This is used by main thread only. */
>  static struct hmap put_fdbs;
> +/* Contains "struct fdb_data". This is populated by pinctrl thread
> + * and consumed by main thread. */
> +static struct spsc_ring fdbs_ring;
>
>  /* MAC learning (fdb) related functions.  Runs within the main
>   * ovn-controller thread context. */
> @@ -8889,6 +8907,8 @@ static void
>  init_fdb_entries(void)
>  {
>      hmap_init(&put_fdbs);
> +    spsc_ring_init(&fdbs_ring, MAX_FDB_ENTRIES,
> +                   sizeof (struct fdb_data));
>  }
>
>  static void
> @@ -8896,6 +8916,7 @@ destroy_fdb_entries(void)
>  {
>      fdbs_clear(&put_fdbs);
>      hmap_destroy(&put_fdbs);
> +    spsc_ring_destroy(&fdbs_ring);
>  }
>
>  static const struct sbrec_fdb *
> @@ -8975,13 +8996,24 @@ run_put_fdbs(struct ovsdb_idl_txn *ovnsb_idl_txn,
>               struct ovsdb_idl_index *sbrec_port_binding_by_key,
>               struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
>               struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac, uint64_t
> cur_cfg)
> -             OVS_REQUIRES(pinctrl_mutex)
>  {
> +    long long now = time_msec();
> +
> +    struct fdb_data fdb_data;
> +    SPSC_RING_FOR_EACH_POP (&fdbs_ring, fdb_data) {
> +        if (hmap_count(&put_fdbs) >= MAX_FDB_ENTRIES) {
> +            COVERAGE_INC(pinctrl_drop_put_fdb);
> +            continue;
> +        }
> +
> +        uint32_t delay = random_range(MAX_FDB_DELAY_MSEC) + 1;
> +        fdb_add(&put_fdbs, fdb_data, now + delay);
> +    }
> +
>      if (!ovnsb_idl_txn) {
>          return;
>      }
>
> -    long long now = time_msec();
>      struct fdb *fdb;
>      HMAP_FOR_EACH_SAFE (fdb, hmap_node, &put_fdbs) {
>          if (fdb->cfg >= 0 && pinctrl_is_sb_commited(fdb->cfg, cur_cfg)) {
> @@ -8997,7 +9029,6 @@ run_put_fdbs(struct ovsdb_idl_txn *ovnsb_idl_txn,
>
>  static void
>  wait_put_fdbs(void)
> -    OVS_REQUIRES(pinctrl_mutex)
>  {
>      struct fdb *fdb;
>      HMAP_FOR_EACH (fdb, hmap_node, &put_fdbs) {
> @@ -9008,23 +9039,20 @@ wait_put_fdbs(void)
>  /* Called with in the pinctrl_handler thread context. */
>  static void
>  pinctrl_handle_put_fdb(const struct flow *md, const struct flow *headers)
> -                       OVS_REQUIRES(pinctrl_mutex)
>  {
> -    if (hmap_count(&put_fdbs) >= MAX_FDB_ENTRIES) {
> -        COVERAGE_INC(pinctrl_drop_put_fdb);
> -        return;
> -    }
> -
>      struct fdb_data fdb_data = (struct fdb_data) {
>              .dp_key =  ntohll(md->metadata),
>              .port_key =  md->regs[MFF_LOG_INPORT - MFF_REG0],
>              .mac = headers->dl_src,
>      };
>
> -    uint32_t delay = random_range(MAX_FDB_DELAY_MSEC) + 1;
> -    long long timestamp = time_msec() + delay;
> -    fdb_add(&put_fdbs, fdb_data, timestamp);
> -    notify_pinctrl_main();
> +    if (!spsc_ring_push(&fdbs_ring, &fdb_data)) {
> +        /* This shouldn't happen, the main thread drains the ring buffer
> +         * every iteration. */
> +        COVERAGE_INC(pinctrl_drop_put_fdb);
> +    } else {
> +        notify_pinctrl_main();
> +    }
>  }
>
>  /* This function sets the register bit 'MLF_FROM_CTRL_BIT'
> --
> 2.54.0
>
>
There are two mistakes in the current version,
both run_put_mac_bindings and run_put_fdbs
remained behind the mutex in pinctrl_run, I'll send
v2 with that fixed, sorry about the noise.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to