With dedicated RDMA link monitor, there are two threads which can update the IB device port information.
Add a new flag to avoid the race condition. Update should go through RDMA link monitor once ready. Current logic is: 1. Update all port information in probing thread. 2. Probe thread initiates the dedicated rdma monitor thread. Once ready, port information update will be handled by this thread. 3. Next probing won't trigger PMD port information update. No lock is required. Fixes: 51fb5c40c826 ("common/mlx5: optimize device probing") Cc: rongw...@nvidia.com Cc: sta...@dpdk.org Signed-off-by: Rongwei Liu <rongw...@nvidia.com> Acked-by: Viacheslav Ovsiienko <viachesl...@nvidia.com> --- drivers/common/mlx5/linux/mlx5_nl.c | 7 ++- drivers/common/mlx5/mlx5_common.h | 1 + drivers/net/mlx5/linux/mlx5_ethdev_os.c | 69 ++++--------------------- drivers/net/mlx5/linux/mlx5_os.c | 9 +++- 4 files changed, 25 insertions(+), 61 deletions(-) diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c index dd69e229e3..84c12efdc7 100644 --- a/drivers/common/mlx5/linux/mlx5_nl.c +++ b/drivers/common/mlx5/linux/mlx5_nl.c @@ -1171,8 +1171,12 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info data.ibindex = dev_info->ibindex; } + /* Update should be done via monitor thread to avoid race condition */ + if (dev_info->async_mon_ready) { + rte_errno = ENODEV; + return 0; + } ret = mlx5_nl_port_info(nl, pindex, &data); - if (dev_info->probe_opt && !strcmp(dev_info->ibname, name)) { if ((!ret || ret == -ENODEV) && dev_info->port_info && pindex <= dev_info->port_num) { @@ -1182,7 +1186,6 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info dev_info->port_info[pindex].valid = 1; } } - return ret ? 0 : data.ifindex; } diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h index bea1382911..b49f0c850e 100644 --- a/drivers/common/mlx5/mlx5_common.h +++ b/drivers/common/mlx5/mlx5_common.h @@ -185,6 +185,7 @@ struct mlx5_dev_info { uint32_t ibindex; char ibname[MLX5_FS_NAME_MAX]; uint8_t probe_opt; + uint8_t async_mon_ready; struct mlx5_port_nl_info *port_info; }; diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c index a371c2c747..180fd60f3a 100644 --- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c +++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c @@ -704,59 +704,6 @@ mlx5_link_update_bond(struct rte_eth_dev *dev) ((ifr.ifr_flags & IFF_UP) && (ifr.ifr_flags & IFF_RUNNING)); } -static void -mlx5_handle_port_info_update(struct mlx5_dev_info *dev_info, uint32_t if_index, - uint16_t msg_type) -{ - struct mlx5_switch_info info = { - .master = 0, - .representor = 0, - .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, - .port_name = 0, - .switch_id = 0, - }; - uint32_t i; - int nl_route; - - if (dev_info->port_num <= 1 || dev_info->port_info == NULL) - return; - - DRV_LOG(DEBUG, "IB device %s ifindex %u received netlink event %u", - dev_info->ibname, if_index, msg_type); - for (i = 1; i <= dev_info->port_num; i++) { - if (!dev_info->port_info[i].valid) - continue; - if (dev_info->port_info[i].ifindex == if_index) - break; - } - if (msg_type == RTM_NEWLINK && i > dev_info->port_num) { - nl_route = mlx5_nl_init(NETLINK_ROUTE, 0); - if (nl_route < 0) - goto flush_all; - - if (mlx5_nl_switch_info(nl_route, if_index, &info)) { - if (mlx5_sysfs_switch_info(if_index, &info)) - goto flush_all; - } - - if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF || - info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFVF) - goto flush_all; - close(nl_route); - } else if (msg_type == RTM_DELLINK && i <= dev_info->port_num) { - memset(dev_info->port_info + i, 0, sizeof(struct mlx5_port_nl_info)); - } - - return; -flush_all: - if (nl_route >= 0) - close(nl_route); - for (i = 1; i <= dev_info->port_num; i++) { - if (!dev_info->port_info[i].ifindex) - dev_info->port_info[i].valid = 0; - } -} - static void mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg) { @@ -766,8 +713,6 @@ mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg) if (mlx5_nl_parse_link_status_update(hdr, &if_index) < 0) return; - if (sh->cdev->config.probe_opt && sh->cdev->dev_info.port_num > 1 && !sh->rdma_monitor_supp) - mlx5_handle_port_info_update(&sh->cdev->dev_info, if_index, hdr->nlmsg_type); for (i = 0; i < sh->max_port; i++) { struct mlx5_dev_shared_port *port = &sh->port[i]; @@ -970,10 +915,18 @@ mlx5_dev_interrupt_handler_ib(void *arg) return; if (data.event_type == MLX5_NL_RDMA_NETDEV_ATTACH_EVENT && - !(data.flags & MLX5_NL_CMD_GET_NET_INDEX)) + !(data.flags & MLX5_NL_CMD_GET_NET_INDEX)) { + DRV_LOG(WARNING, "Incomplete RDMA ATTACH event for ibdev[%d]", + dev_info->ibindex); + if (data.flags & MLX5_NL_CMD_GET_PORT_INDEX) + memset(dev_info->port_info + data.portnum, 0, + sizeof(struct mlx5_port_nl_info)); + else + goto flush_all; return; + } - DRV_LOG(DEBUG, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,", + DRV_LOG(INFO, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,", data.event_type, data.ibindex, data.ifindex, data.portnum); /* Changes found in number of SF/VF ports. All information is likely unreliable. */ @@ -992,7 +945,7 @@ mlx5_dev_interrupt_handler_ib(void *arg) goto flush_all; } } else if (data.event_type == MLX5_NL_RDMA_NETDEV_DETACH_EVENT) { - memset(dev_info->port_info + data.portnum, 0, sizeof(struct mlx5_port_nl_info)); + dev_info->port_info[data.portnum].ifindex = 0; } return; diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 85b3fabaf5..edfe61ea55 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -3051,7 +3051,7 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh) DRV_LOG(ERR, "Failed to allocate intr_handle."); return; } - if (sh->cdev->config.probe_opt && + if (sh->cdev->dev_info.probe_opt && sh->cdev->dev_info.port_num > 1 && !sh->rdma_monitor_supp) { nlsk_fd = mlx5_nl_rdma_monitor_init(); @@ -3076,8 +3076,15 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh) close(nlsk_fd); return; } + sh->cdev->dev_info.async_mon_ready = 1; } else { close(nlsk_fd); + if (sh->cdev->dev_info.probe_opt) { + DRV_LOG(INFO, "Failed to create rdma link monitor, disable probe optimization"); + sh->cdev->dev_info.probe_opt = 0; + mlx5_free(sh->cdev->dev_info.port_info); + sh->cdev->dev_info.port_info = NULL; + } } } nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK); -- 2.27.0