On Thu, May 28, 2026 at 6:02 AM Dumitru Ceara <[email protected]> wrote: > > Hi all, > > On 5/28/26 11:55 AM, Lorenzo Bianconi wrote: > >> On Wed, May 27, 2026 at 7:06 PM Lorenzo Bianconi < > >> [email protected]> wrote: > >> > >>>> The packet buffering, which is part of the ARP processing, required > >>>> a pinctrl lock. As a consequence we could block packet processing > >>>> when the lock is taken by main thread and possibly drop some traffic > >>>> that is heading towards pinctrl. Make sure the packet buffering is > >>>> now handled without any lock, that is done by using cmap instead of > >>>> hmap to store the objects and all concurrent members are accessed > >>>> via atomics instead. This should increase the throughput of the > >>>> packet-in processing for ARP handling. > >>> > >>> Hi Ales, > >>> > >>> I think the patch is fine. Moreover, reading the comments from Marc, I > >>> consider > >>> this one the first step toward reducing the contention in pinctrl_thread > >>> codepath. > >>> > >>> Acked-by: Lorenzo Bianconi <[email protected]> > >>> > >>> Regards, > >>> Lorenzo > >> > >> > >>>> > >>>> Reported-at: https://redhat.atlassian.net/browse/FDP-3301 > >>>> Signed-off-by: Ales Musil <[email protected]> > >>>> --- > >>>> controller/mac-cache.c | 127 +++++++++++++++++++++++------------------ > >>>> controller/mac-cache.h | 48 +++++++--------- > >>>> controller/pinctrl.c | 81 +++++++++++--------------- > >>>> 3 files changed, 125 insertions(+), 131 deletions(-) > >>>> > >>> > >>> [...] > >>> > >>>> @@ -115,25 +115,25 @@ struct bp_packet_data { > >>>> }; > >>>> > >>>> struct buffered_packets { > >>>> - struct hmap_node hmap_node; > >>>> + struct cmap_node cmap_node; > >>>> > >>>> - struct mac_binding_data mb_data; > >>>> + struct mac_binding_data mb_data; /* Immutable after insert. */ > >>> > >>> nit: in order to make it really immutable we can do something like: > >>> > >>> const struct mac_binding_data * const mb_data; > >>> > >> > >> That would require extra allocation because it would become > >> a pointer instead of embedded struct. Which is probably not worth > >> Also, making it const struct is a bit clunky during initialization. > > > > ack, I agree it is uglier, but it would be safer. Anyway, just a nit, up to > > you. > > > >> > >> > >>>> > >>>> - /* Queue of packet_data associated with this struct. */ > >>>> + /* Queue of packet_data associated with this struct. > >>>> + * Handler thread only. */ > >>>> struct vector queue; > >>> > >>> nit: can we make vector (and the fields below) really accessible just by > >>> pinctrl > >>> thread? > >>> > >> > >> I was thinking about that, but it didn't seem worth splitting into > >> two structs just because of this. > > > > even here, just a nit, up to you. > > > > Regards, > > Lorenzo > > > >> > >> > >>>> > >>>> - /* Timestamp in ms when the buffered packet should expire. */ > >>>> + /* Timestamp in ms when the buffered packet should expire. > >>>> + * Handler thread only. */ > >>>> long long int expire_at_ms; > >>>> > >>>> - /* Timestamp in ms when the buffered packet should do full SB > >>> lookup.*/ > >>>> - long long int lookup_at_ms; > >>>> -}; > >>>> + /* Resolved MAC address packed as uint64. 0 means unresolved. > >>>> + * Written by main thread, read by handler thread. */ > >>>> + atomic_uint64_t resolved_mac; > >>>> > >>>> -struct buffered_packets_ctx { > >>>> - /* Map of all buffered packets waiting for the MAC address. */ > >>>> - struct hmap buffered_packets; > >>>> - /* List of packet data that are ready to be sent. */ > >>>> - struct vector ready_packets_data; > >>>> + /* Timestamp in ms when the buffered packet should do full SB > >>> lookup. > >>>> + * Main thread only. */ > >>>> + long long int lookup_at_ms; > >>>> }; > >>>> > >>>> /* Thresholds. */ > >>>> @@ -211,27 +211,23 @@ void fdb_stats_run(struct vector *stats_vec, > >>> uint64_t *req_delay, void *data); > >>>> void bp_packet_data_destroy(struct bp_packet_data *pd); > >>>> > >>>> struct buffered_packets * > >>>> -buffered_packets_add(struct buffered_packets_ctx *ctx, > >>>> +buffered_packets_add(struct cmap *bp_map, > >>>> struct mac_binding_data mb_data); > >>>> > >>>> void buffered_packets_packet_data_enqueue(struct buffered_packets *bp, > >>>> const struct > >>> ofputil_packet_in *pin, > >>>> const struct ofpbuf > >>> *continuation); > >>>> > >>>> -void buffered_packets_ctx_run(struct buffered_packets_ctx *ctx, > >>>> - const struct hmap *recent_mbs, > >>>> - struct ovsdb_idl_index *sbrec_pb_by_key, > >>>> - struct ovsdb_idl_index *sbrec_dp_by_key, > >>>> - struct ovsdb_idl_index *sbrec_pb_by_name, > >>>> - struct ovsdb_idl_index > >>> *sbrec_mb_by_lport_ip); > >>>> - > >>>> -void buffered_packets_ctx_init(struct buffered_packets_ctx *ctx); > >>>> - > >>>> -void buffered_packets_ctx_destroy(struct buffered_packets_ctx *ctx); > >>>> +bool buffered_packets_lookup_run(struct cmap *bp_map, > >>>> + const struct hmap *recent_mbs, > >>>> + struct ovsdb_idl_index > >>> *sbrec_pb_by_key, > >>>> + struct ovsdb_idl_index > >>> *sbrec_dp_by_key, > >>>> + struct ovsdb_idl_index > >>> *sbrec_pb_by_name, > >>>> + struct ovsdb_idl_index > >>> *sbrec_mb_by_lport_ip); > >>>> > >>>> -bool buffered_packets_ctx_is_ready_to_send(struct buffered_packets_ctx > >>> *ctx); > >>>> +void buffered_packets_run(struct cmap *bp_map, struct vector *rpd); > >>>> > >>>> -bool buffered_packets_ctx_has_packets(struct buffered_packets_ctx *ctx); > >>>> +void buffered_packets_map_destroy(struct cmap *bp_map); > >>>> > >>>> void mac_binding_probe_stats_process_flow_stats( > >>>> struct vector *stats_vec, > >>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c > >>>> index 4473ec668..bb8d20e7f 100644 > >>>> --- a/controller/pinctrl.c > >>>> +++ b/controller/pinctrl.c > >>>> @@ -185,16 +185,15 @@ struct pinctrl { > >>>> static struct pinctrl pinctrl; > >>>> > >>>> static bool pinctrl_is_sb_commited(int64_t commit_cfg, int64_t cur_cfg); > >>>> -static void init_buffered_packets_ctx(void); > >>>> -static void destroy_buffered_packets_ctx(void); > >>>> +static void init_buffered_packets_map(void); > >>>> +static void destroy_buffered_packets_map(void); > >>>> static void > >>>> run_buffered_binding(const struct sbrec_mac_binding_table > >>> *mac_binding_table, > >>>> const struct hmap *local_datapaths, > >>>> struct ovsdb_idl_index *sbrec_port_binding_by_key, > >>>> struct ovsdb_idl_index > >>> *sbrec_datapath_binding_by_key, > >>>> struct ovsdb_idl_index *sbrec_port_binding_by_name, > >>>> - struct ovsdb_idl_index > >>> *sbrec_mac_binding_by_lport_ip) > >>>> - OVS_REQUIRES(pinctrl_mutex); > >>>> + struct ovsdb_idl_index > >>> *sbrec_mac_binding_by_lport_ip); > >>>> > >>>> static void pinctrl_handle_put_mac_binding(const struct flow *md, > >>>> const struct flow *headers, > >>>> @@ -209,8 +208,7 @@ static void run_put_mac_bindings( > >>>> struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip) > >>>> OVS_REQUIRES(pinctrl_mutex); > >>>> static void wait_put_mac_bindings(void); > >>>> -static void send_mac_binding_buffered_pkts(struct rconn *swconn) > >>>> - OVS_REQUIRES(pinctrl_mutex); > >>>> +static void send_mac_binding_buffered_pkts(struct rconn *swconn); > >>>> > >>>> static void pinctrl_activation_strategy_handler(const struct match *md); > >>>> > >>>> @@ -562,7 +560,7 @@ pinctrl_init(void) > >>>> init_send_arps_nds(); > >>>> init_ipv6_ras(); > >>>> init_ipv6_prefixd(); > >>>> - init_buffered_packets_ctx(); > >>>> + init_buffered_packets_map(); > >>>> init_activated_ports(); > >>>> init_event_table(); > >>>> ip_mcast_snoop_init(); > >>>> @@ -1577,18 +1575,18 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn > >>> *ovnsb_idl_txn, > >>>> } > >>>> } > >>>> > >>>> -static struct buffered_packets_ctx buffered_packets_ctx; > >>>> +static struct cmap buffered_packets_map; > >>>> > >>>> static void > >>>> -init_buffered_packets_ctx(void) > >>>> +init_buffered_packets_map(void) > >>>> { > >>>> - buffered_packets_ctx_init(&buffered_packets_ctx); > >>>> + cmap_init(&buffered_packets_map); > >>>> } > >>>> > >>>> static void > >>>> -destroy_buffered_packets_ctx(void) > >>>> +destroy_buffered_packets_map(void) > >>>> { > >>>> - buffered_packets_ctx_destroy(&buffered_packets_ctx); > >>>> + buffered_packets_map_destroy(&buffered_packets_map); > >>>> } > >>>> > >>>> /* Called with in the pinctrl_handler thread context. */ > >>>> @@ -1596,7 +1594,6 @@ static void > >>>> pinctrl_handle_buffered_packets(const struct ofputil_packet_in *pin, > >>>> const struct ofpbuf *continuation, > >>>> bool is_arp) > >>>> -OVS_REQUIRES(pinctrl_mutex) > >>>> { > >>>> const struct match *md = &pin->flow_metadata; > >>>> struct mac_binding_data mb_data; > >>>> @@ -1613,7 +1610,7 @@ OVS_REQUIRES(pinctrl_mutex) > >>>> md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0], > >>>> ip, eth_addr_zero); > >>>> > >>>> - struct buffered_packets *bp = > >>> buffered_packets_add(&buffered_packets_ctx, > >>>> + struct buffered_packets *bp = > >>> buffered_packets_add(&buffered_packets_map, > >>>> mb_data); > >>>> if (!bp) { > >>>> COVERAGE_INC(pinctrl_drop_buffered_packets_map); > >>>> @@ -1643,9 +1640,7 @@ pinctrl_handle_arp(struct rconn *swconn, const > >>> struct flow *ip_flow, > >>>> return; > >>>> } > >>>> > >>>> - ovs_mutex_lock(&pinctrl_mutex); > >>>> pinctrl_handle_buffered_packets(pin, continuation, true); > >>>> - ovs_mutex_unlock(&pinctrl_mutex); > >>>> > >>>> /* Compose an ARP packet. */ > >>>> uint64_t packet_stub[128 / 8]; > >>>> @@ -4081,12 +4076,12 @@ pinctrl_handler(void *arg_) > >>>> send_arp_nd_run(swconn, &send_arp_nd_time); > >>>> send_ipv6_ras(swconn, &send_ipv6_ra_time); > >>>> send_ipv6_prefixd(swconn, &send_prefixd_time); > >>>> - send_mac_binding_buffered_pkts(swconn); > >>>> bfd_monitor_send_msg(swconn, &bfd_time); > >>>> ovs_mutex_unlock(&pinctrl_mutex); > >>>> } else { > >>>> lock_failed = true; > >>>> } > >>>> + send_mac_binding_buffered_pkts(swconn); > >>>> send_garp_rarp_run(swconn, &send_garp_rarp_time); > >>>> ip_mcast_querier_run(swconn, &send_mcast_query_time); > >>>> } > >>>> @@ -4226,11 +4221,6 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > >>>> sbrec_port_binding_by_key, > >>>> sbrec_igmp_groups, > >>>> sbrec_ip_multicast_opts); > >>>> - run_buffered_binding(mac_binding_table, local_datapaths, > >>>> - sbrec_port_binding_by_key, > >>>> - sbrec_datapath_binding_by_key, > >>>> - sbrec_port_binding_by_name, > >>>> - sbrec_mac_binding_by_lport_ip); > >>>> sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, > >>> sbrec_port_binding_by_name, > >>>> chassis); > >>>> bfd_monitor_run(ovnsb_idl_txn, bfd_table, > >>> sbrec_port_binding_by_name, > >>>> @@ -4241,6 +4231,12 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > >>>> run_activated_ports(ovnsb_idl_txn, sbrec_datapath_binding_by_key, > >>>> sbrec_port_binding_by_key, chassis); > >>>> ovs_mutex_unlock(&pinctrl_mutex); > >>>> + > >>>> + run_buffered_binding(mac_binding_table, local_datapaths, > >>>> + sbrec_port_binding_by_key, > >>>> + sbrec_datapath_binding_by_key, > >>>> + sbrec_port_binding_by_name, > >>>> + sbrec_mac_binding_by_lport_ip); > >>>> } > >>>> > >>>> /* Table of ipv6_ra_state structures, keyed on logical port name. > >>>> @@ -4758,7 +4754,7 @@ pinctrl_destroy(void) > >>>> destroy_send_arps_nds(); > >>>> destroy_ipv6_ras(); > >>>> destroy_ipv6_prefixd(); > >>>> - destroy_buffered_packets_ctx(); > >>>> + destroy_buffered_packets_map(); > >>>> destroy_activated_ports(); > >>>> event_table_destroy(); > >>>> destroy_put_mac_bindings(); > >>>> @@ -4843,30 +4839,24 @@ pinctrl_handle_put_mac_binding(const struct flow > >>> *md, > >>>> notify_pinctrl_main(); > >>>> } > >>>> > >>>> -#define READY_PACKETS_VEC_CAPACITY_THRESHOLD 1024 > >>>> - > >>>> /* Called with in the pinctrl_handler thread context. */ > >>>> static void > >>>> send_mac_binding_buffered_pkts(struct rconn *swconn) > >>>> - OVS_REQUIRES(pinctrl_mutex) > >>>> { > >>>> enum ofp_version version = rconn_get_version(swconn); > >>>> enum ofputil_protocol proto = > >>> ofputil_protocol_from_ofp_version(version); > >>>> - struct vector *rpd = &buffered_packets_ctx.ready_packets_data; > >>>> + struct vector rpd = VECTOR_EMPTY_INITIALIZER(struct bp_packet_data); > >>>> + > >>>> + buffered_packets_run(&buffered_packets_map, &rpd); > >>>> > >>>> struct bp_packet_data *pd; > >>>> - VECTOR_FOR_EACH_PTR (rpd, pd) { > >>>> + VECTOR_FOR_EACH_PTR (&rpd, pd) { > >>>> queue_msg(swconn, ofputil_encode_resume(&pd->pin, > >>> pd->continuation, > >>>> proto)); > >>>> bp_packet_data_destroy(pd); > >>>> } > >>>> > >>>> - vector_clear(rpd); > >>>> - if (vector_capacity(rpd) >= READY_PACKETS_VEC_CAPACITY_THRESHOLD) { > >>>> - VLOG_DBG("The ready_packets_data vector capacity (%"PRIuSIZE") " > >>>> - "is over threshold.", vector_capacity(rpd)); > >>>> - vector_shrink_to_fit(rpd); > >>>> - } > >>>> + vector_destroy(&rpd); > >>>> } > >>>> > >>>> static void > >>>> @@ -4935,9 +4925,8 @@ run_buffered_binding(const struct > >>> sbrec_mac_binding_table *mac_binding_table, > >>>> struct ovsdb_idl_index > >>> *sbrec_datapath_binding_by_key, > >>>> struct ovsdb_idl_index *sbrec_port_binding_by_name, > >>>> struct ovsdb_idl_index > >>> *sbrec_mac_binding_by_lport_ip) > >>>> - OVS_REQUIRES(pinctrl_mutex) > >>>> { > >>>> - if (!buffered_packets_ctx_has_packets(&buffered_packets_ctx)) { > >>>> + if (cmap_is_empty(&buffered_packets_map)) { > >>>> return; > >>>> } > >>>> > >>>> @@ -4983,18 +4972,16 @@ run_buffered_binding(const struct > >>> sbrec_mac_binding_table *mac_binding_table, > >>>> mac_binding_add(&recent_mbs, mb_data, smb, 0); > >>>> } > >>>> > >>>> - buffered_packets_ctx_run(&buffered_packets_ctx, &recent_mbs, > >>>> - sbrec_port_binding_by_key, > >>>> - sbrec_datapath_binding_by_key, > >>>> - sbrec_port_binding_by_name, > >>>> - sbrec_mac_binding_by_lport_ip); > >>>> + if (buffered_packets_lookup_run(&buffered_packets_map, &recent_mbs, > >>>> + sbrec_port_binding_by_key, > >>>> + sbrec_datapath_binding_by_key, > >>>> + sbrec_port_binding_by_name, > >>>> + sbrec_mac_binding_by_lport_ip)) { > >>>> + notify_pinctrl_handler(); > >>>> + } > >>>> > >>>> mac_bindings_clear(&recent_mbs); > >>>> hmap_destroy(&recent_mbs); > >>>> - > >>>> - if (buffered_packets_ctx_is_ready_to_send(&buffered_packets_ctx)) { > >>>> - notify_pinctrl_handler(); > >>>> - } > >>>> } > >>>> > >>>> static void > >>>> @@ -6438,7 +6425,7 @@ may_inject_pkts(void) > >>>> !cmap_is_empty(&garp_rarp_get_data()->data) || > >>>> ipv6_prefixd_should_inject() || > >>>> !ovs_list_is_empty(&mcast_query_list) || > >>>> - > >>> buffered_packets_ctx_is_ready_to_send(&buffered_packets_ctx) || > >>>> + !cmap_is_empty(&buffered_packets_map) || > >>>> bfd_monitor_should_inject()); > >>>> } > >>>> > >>>> @@ -6528,9 +6515,7 @@ pinctrl_handle_nd_ns(struct rconn *swconn, const > >>> struct flow *ip_flow, > >>>> return; > >>>> } > >>>> > >>>> - ovs_mutex_lock(&pinctrl_mutex); > >>>> pinctrl_handle_buffered_packets(pin, continuation, false); > >>>> - ovs_mutex_unlock(&pinctrl_mutex); > >>>> > >>>> uint64_t packet_stub[128 / 8]; > >>>> struct dp_packet packet; > >>>> -- > >>>> 2.54.0 > >>>> > >>>> _______________________________________________ > >>>> dev mailing list > >>>> [email protected] > >>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev > >>>> > >>> > >> > >> Thank you Mark and Lorenzo, > >> applied to main. I was also considering backporting this to 26.03, are > >> there any concerns or objections? > >> > > Given that 26.03 is our new LTS, I'd also apply it there. > > Regards, > Dumitru
I pushed the change to 26.03 as well. > > >> Regards, > >> Ales > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
