From: Long Li <lon...@microsoft.com> The driver uses mana_shared_data for tracking usage count for primary process. This is not correct as the mana_shared_data is allocated by the primary and is meant to track usage of secondary process by the primary process. And it creates a race condition when the device is removed because the counter is no longer available if this shared memory is being freed.
Move the usage count tracking to mana_local_data and fix the race condition in mana_pci_remove(). Fixes: 517ed6e2d590 ("net/mana: add basic driver with build environment") Cc: sta...@dpdk.org Signed-off-by: Long Li <lon...@microsoft.com> --- drivers/net/mana/mana.c | 76 ++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/drivers/net/mana/mana.c b/drivers/net/mana/mana.c index c37c4e3444..da4a54144f 100644 --- a/drivers/net/mana/mana.c +++ b/drivers/net/mana/mana.c @@ -1167,8 +1167,12 @@ mana_init_shared_data(void) rte_spinlock_lock(&mana_shared_data_lock); /* Skip if shared data is already initialized */ - if (mana_shared_data) + if (mana_shared_data) { + DRV_LOG(INFO, "shared data is already initialized"); goto exit; + } + + memset(&mana_local_data, 0, sizeof(mana_local_data)); if (rte_eal_process_type() == RTE_PROC_PRIMARY) { mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA, @@ -1192,7 +1196,6 @@ mana_init_shared_data(void) } mana_shared_data = secondary_mz->addr; - memset(&mana_local_data, 0, sizeof(mana_local_data)); } exit: @@ -1213,11 +1216,11 @@ mana_init_once(void) if (ret) return ret; - rte_spinlock_lock(&mana_shared_data->lock); + rte_spinlock_lock(&mana_shared_data_lock); switch (rte_eal_process_type()) { case RTE_PROC_PRIMARY: - if (mana_shared_data->init_done) + if (mana_local_data.init_done) break; ret = mana_mp_init_primary(); @@ -1225,7 +1228,7 @@ mana_init_once(void) break; DRV_LOG(ERR, "MP INIT PRIMARY"); - mana_shared_data->init_done = 1; + mana_local_data.init_done = 1; break; case RTE_PROC_SECONDARY: @@ -1248,7 +1251,7 @@ mana_init_once(void) break; } - rte_spinlock_unlock(&mana_shared_data->lock); + rte_spinlock_unlock(&mana_shared_data_lock); return ret; } @@ -1319,11 +1322,6 @@ mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr, eth_dev->tx_pkt_burst = mana_tx_burst; eth_dev->rx_pkt_burst = mana_rx_burst; - rte_spinlock_lock(&mana_shared_data->lock); - mana_shared_data->secondary_cnt++; - mana_local_data.secondary_cnt++; - rte_spinlock_unlock(&mana_shared_data->lock); - rte_eth_copy_pci_info(eth_dev, pci_dev); rte_eth_dev_probing_finish(eth_dev); @@ -1406,10 +1404,6 @@ mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr, goto failed; } - rte_spinlock_lock(&mana_shared_data->lock); - mana_shared_data->primary_cnt++; - rte_spinlock_unlock(&mana_shared_data->lock); - eth_dev->device = &pci_dev->device; DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id); @@ -1552,13 +1546,42 @@ mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, count = mana_pci_probe_mac(pci_dev, NULL); } + /* If no device is found, clean up resources if this is the last one */ if (!count) { - rte_memzone_free(mana_shared_mz); - mana_shared_mz = NULL; - ret = -ENODEV; + rte_spinlock_lock(&mana_shared_data_lock); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + if (!mana_local_data.primary_cnt) { + mana_mp_uninit_primary(); + rte_memzone_free(mana_shared_mz); + mana_shared_mz = NULL; + mana_shared_data = NULL; + } + } else { + if (!mana_local_data.secondary_cnt) { + mana_mp_uninit_secondary(); + mana_shared_data = NULL; + } + } + rte_spinlock_unlock(&mana_shared_data_lock); + return -ENODEV; } - return ret; + /* At least one eth_dev is probed, init shared data */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_spinlock_lock(&mana_shared_data_lock); + mana_local_data.primary_cnt++; + rte_spinlock_unlock(&mana_shared_data_lock); + } else { + rte_spinlock_lock(&mana_shared_data_lock); + mana_local_data.secondary_cnt++; + rte_spinlock_unlock(&mana_shared_data_lock); + + rte_spinlock_lock(&mana_shared_data->lock); + mana_shared_data->secondary_cnt++; + rte_spinlock_unlock(&mana_shared_data->lock); + } + + return 0; } static int @@ -1576,22 +1599,18 @@ mana_pci_remove(struct rte_pci_device *pci_dev) if (rte_eal_process_type() == RTE_PROC_PRIMARY) { rte_spinlock_lock(&mana_shared_data_lock); - rte_spinlock_lock(&mana_shared_data->lock); + RTE_VERIFY(mana_local_data.primary_cnt > 0); + mana_local_data.primary_cnt--; - RTE_VERIFY(mana_shared_data->primary_cnt > 0); - mana_shared_data->primary_cnt--; - if (!mana_shared_data->primary_cnt) { + if (!mana_local_data.primary_cnt) { DRV_LOG(DEBUG, "mp uninit primary"); mana_mp_uninit_primary(); - } - - rte_spinlock_unlock(&mana_shared_data->lock); - /* Also free the shared memory if this is the last */ - if (!mana_shared_data->primary_cnt) { + /* Also free the shared memory if this is the last */ DRV_LOG(DEBUG, "free shared memezone data"); rte_memzone_free(mana_shared_mz); mana_shared_mz = NULL; + mana_shared_data = NULL; } rte_spinlock_unlock(&mana_shared_data_lock); @@ -1608,6 +1627,7 @@ mana_pci_remove(struct rte_pci_device *pci_dev) if (!mana_local_data.secondary_cnt) { DRV_LOG(DEBUG, "mp uninit secondary"); mana_mp_uninit_secondary(); + mana_shared_data = NULL; } rte_spinlock_unlock(&mana_shared_data_lock); -- 2.34.1