On 1/18/24 14:16, David Marchand wrote:
> When OVS (main thread) configures a DPDK netdev, it holds a netdev_dpdk
> mutex lock.
> As part of this configure operation, the net/iavf driver (used with i40e
> VF devices) triggers a queue count change. The PF entity (serviced by a
> kernel PF driver for example) handles this change and requests back that
> the VF driver resets the VF device. The driver then completes the VF reset
> operation on its side and waits for completion of the iavf-event thread
> responsible for handling various VF device events.
>
> On the other hand, handling of the VF reset request in this iavf-event
> thread results in notifying the application with a port reset request
> (RTE_ETH_EVENT_INTR_RESET). The OVS reset callback tries to take a hold
> of the same netdev_dpdk mutex and blocks the iavf-event thread.
>
> As a resut, the net/iavf driver (still running on OVS main thread) is
> unable to complete as it is waiting for iavf-event to complete.
>
> To break from this situation, the OVS reset callback now won't take a
> netdev_dpdk mutex. Instead, the port reset request is stored in a simple
> RTE_ETH_MAXPORTS array associated to a seq object.
> This is enough to let the VF driver complete this port initialisation.
> The OVS main thread later handles the port reset request.
>
> Link: https://bugs.dpdk.org/show_bug.cgi?id=1337
> Signed-off-by: David Marchand <[email protected]>
> ---
> lib/netdev-dpdk.c | 73 +++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 58 insertions(+), 15 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index fb26825ff8..528850971a 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -58,6 +58,7 @@
> #include "openvswitch/match.h"
> #include "openvswitch/ofp-parse.h"
> #include "openvswitch/ofp-print.h"
> +#include "openvswitch/poll-loop.h"
> #include "openvswitch/shash.h"
> #include "openvswitch/vlog.h"
> #include "ovs-numa.h"
> @@ -2101,32 +2102,70 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
> return new_port_id;
> }
>
> +static struct seq *netdev_dpdk_reset_seq;
> +static uint64_t netdev_dpdk_last_reset_seq;
> +static bool netdev_dpdk_pending_reset[RTE_MAX_ETHPORTS];
> +
> +static void
> +netdev_dpdk_wait(const struct netdev_class *netdev_class OVS_UNUSED)
> +{
> + uint64_t last_reset_seq = seq_read(netdev_dpdk_reset_seq);
> +
> + if (netdev_dpdk_last_reset_seq == last_reset_seq) {
> + seq_wait(netdev_dpdk_reset_seq, netdev_dpdk_last_reset_seq);
> + } else {
> + poll_immediate_wake();
> + }
> +}
> +
> +static void
> +netdev_dpdk_run(const struct netdev_class *netdev_class OVS_UNUSED)
> +{
> + uint64_t reset_seq = seq_read(netdev_dpdk_reset_seq);
> +
> + if (reset_seq != netdev_dpdk_last_reset_seq) {
> + dpdk_port_t port_id;
> +
> + netdev_dpdk_last_reset_seq = reset_seq;
> +
> + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
> + struct netdev_dpdk *dev;
> +
> + if (!netdev_dpdk_pending_reset[port_id]) {
> + continue;
> + }
> + netdev_dpdk_pending_reset[port_id] = false;
> +
> + ovs_mutex_lock(&dpdk_mutex);
> + dev = netdev_dpdk_lookup_by_port_id(port_id);
> + if (dev) {
> + ovs_mutex_lock(&dev->mutex);
> + dev->reset_needed = true;
> + netdev_request_reconfigure(&dev->up);
> + VLOG_DBG_RL(&rl, "%s: Device reset requested.",
> + netdev_get_name(&dev->up));
> + ovs_mutex_unlock(&dev->mutex);
> + }
> + ovs_mutex_unlock(&dpdk_mutex);
> + }
> + }
> +}
> +
> static int
> dpdk_eth_event_callback(dpdk_port_t port_id, enum rte_eth_event_type type,
> void *param OVS_UNUSED, void *ret_param OVS_UNUSED)
Can we rely on this callback to always be called from a main thread?
Otherwise, we should use atomics for an array or a separate lock.
> {
> - struct netdev_dpdk *dev;
> -
> switch ((int) type) {
> case RTE_ETH_EVENT_INTR_RESET:
> - ovs_mutex_lock(&dpdk_mutex);
> - dev = netdev_dpdk_lookup_by_port_id(port_id);
> - if (dev) {
> - ovs_mutex_lock(&dev->mutex);
> - dev->reset_needed = true;
> - netdev_request_reconfigure(&dev->up);
> - VLOG_DBG_RL(&rl, "%s: Device reset requested.",
> - netdev_get_name(&dev->up));
> - ovs_mutex_unlock(&dev->mutex);
> - }
> - ovs_mutex_unlock(&dpdk_mutex);
> + netdev_dpdk_pending_reset[port_id] = true;
> + seq_change(netdev_dpdk_reset_seq);
> break;
>
> default:
> /* Ignore all other types. */
> break;
> - }
> - return 0;
> + }
> + return 0;
> }
>
> static void
> @@ -5001,6 +5040,8 @@ netdev_dpdk_class_init(void)
> "[netdev]", 0, 1,
> netdev_dpdk_get_mempool_info, NULL);
>
> + netdev_dpdk_reset_seq = seq_create();
> + netdev_dpdk_last_reset_seq = seq_read(netdev_dpdk_reset_seq);
> ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
> RTE_ETH_EVENT_INTR_RESET,
> dpdk_eth_event_callback, NULL);
> @@ -6593,6 +6634,8 @@ parse_vhost_config(const struct smap *ovs_other_config)
> #define NETDEV_DPDK_CLASS_BASE \
> NETDEV_DPDK_CLASS_COMMON, \
> .init = netdev_dpdk_class_init, \
> + .wait = netdev_dpdk_wait, \
> + .run = netdev_dpdk_run, \
> .destruct = netdev_dpdk_destruct, \
> .set_tx_multiq = netdev_dpdk_set_tx_multiq, \
> .get_carrier = netdev_dpdk_get_carrier, \
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev