Add support for changing the number of RX/TX queues at runtime via port stop/configure/start. When the queue count changes, perform a full NVS/RNDIS teardown and reinit to allocate fresh VMBus subchannels matching the new queue count, then reconfigure RSS indirection table accordingly.
Key changes: - hn_dev_configure: detect queue count changes and perform full NVS session reinit with subchannel teardown/recreation - hn_dev_stop: drain pending TX completions (up to 1s) to prevent stale completions from corrupting queue state after reconfig - hn_vf_tx/rx_queue_release: use write lock when nulling VF queue pointers to prevent use-after-free with concurrent fast-path readers Signed-off-by: Long Li <[email protected]> --- v2: - Fix reinit_failed recovery: re-map device before chan_open when device is unmapped - Move hn_rndis_conf_offload() to after reinit block - Use write lock in hn_vf_tx/rx_queue_release() - Reset RSS indirection table in subchan_cleanup error path - Fix multi-line comment style drivers/net/netvsc/hn_ethdev.c | 171 +++++++++++++++++++++++++++++++-- drivers/net/netvsc/hn_vf.c | 16 +-- 2 files changed, 171 insertions(+), 16 deletions(-) diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c index 798b4c9023..e0885b74b7 100644 --- a/drivers/net/netvsc/hn_ethdev.c +++ b/drivers/net/netvsc/hn_ethdev.c @@ -745,6 +745,9 @@ netvsc_hotadd_callback(const char *device_name, enum rte_dev_event_type type, } } +static void hn_detach(struct hn_data *hv); +static int hn_attach(struct hn_data *hv, unsigned int mtu); + static int hn_dev_configure(struct rte_eth_dev *dev) { struct rte_eth_conf *dev_conf = &dev->data->dev_conf; @@ -754,6 +757,8 @@ static int hn_dev_configure(struct rte_eth_dev *dev) struct hn_data *hv = dev->data->dev_private; uint64_t unsupported; int i, err, subchan; + uint32_t old_subchans = 0; + bool device_unmapped = false; PMD_INIT_FUNC_TRACE(); @@ -778,36 +783,95 @@ static int hn_dev_configure(struct rte_eth_dev *dev) hv->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP); - err = hn_rndis_conf_offload(hv, txmode->offloads, - rxmode->offloads); - if (err) { - PMD_DRV_LOG(NOTICE, - "offload configure failed"); - return err; - } + /* If queue count unchanged, skip subchannel teardown/reinit */ + if (RTE_MAX(dev->data->nb_rx_queues, + dev->data->nb_tx_queues) == hv->num_queues) + goto skip_reinit; hv->num_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues); + /* Close all existing subchannels */ + for (i = 1; i < HN_MAX_CHANNELS; i++) { + if (hv->channels[i] != NULL) { + rte_vmbus_chan_close(hv->channels[i]); + hv->channels[i] = NULL; + old_subchans++; + } + } + + /* + * If subchannels existed, do a full NVS/RNDIS teardown + * and vmbus re-init to ensure a clean NVS session. + * Cannot re-send NVS subchannel request on the same + * session without invalidating the data path. + */ + if (old_subchans > 0) { + PMD_DRV_LOG(NOTICE, + "reinit NVS (had %u subchannels)", + old_subchans); + + hn_detach(hv); + + rte_vmbus_chan_close(hv->channels[0]); + rte_free(hv->channels[0]); + hv->channels[0] = NULL; + + rte_vmbus_unmap_device(hv->vmbus); + device_unmapped = true; + err = rte_vmbus_map_device(hv->vmbus); + if (err) { + PMD_DRV_LOG(ERR, + "Could not re-map vmbus device!"); + goto reinit_failed; + } + device_unmapped = false; + + hv->rxbuf_res = hv->vmbus->resource[HV_RECV_BUF_MAP]; + hv->chim_res = hv->vmbus->resource[HV_SEND_BUF_MAP]; + + err = rte_vmbus_chan_open(hv->vmbus, &hv->channels[0]); + if (err) { + PMD_DRV_LOG(ERR, + "Could not re-open vmbus channel!"); + goto reinit_failed; + } + + rte_vmbus_set_latency(hv->vmbus, hv->channels[0], + hv->latency); + + err = hn_attach(hv, dev->data->mtu); + if (err) { + rte_vmbus_chan_close(hv->channels[0]); + rte_free(hv->channels[0]); + hv->channels[0] = NULL; + PMD_DRV_LOG(ERR, + "NVS reinit failed: %d", err); + goto reinit_failed; + } + } + for (i = 0; i < NDIS_HASH_INDCNT; i++) hv->rss_ind[i] = i % dev->data->nb_rx_queues; hn_rss_hash_init(hv, rss_conf); subchan = hv->num_queues - 1; + + /* Allocate fresh subchannels and configure RSS */ if (subchan > 0) { err = hn_subchan_configure(hv, subchan); if (err) { PMD_DRV_LOG(NOTICE, "subchannel configuration failed"); - return err; + goto subchan_cleanup; } err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE); if (err) { PMD_DRV_LOG(NOTICE, "rss disable failed"); - return err; + goto subchan_cleanup; } if (rss_conf->rss_hf != 0) { @@ -815,12 +879,75 @@ static int hn_dev_configure(struct rte_eth_dev *dev) if (err) { PMD_DRV_LOG(NOTICE, "initial RSS config failed"); - return err; + goto subchan_cleanup; } } } +skip_reinit: + /* Apply offload config after reinit so it targets the final RNDIS session */ + err = hn_rndis_conf_offload(hv, txmode->offloads, + rxmode->offloads); + if (err) { + PMD_DRV_LOG(NOTICE, + "offload configure failed"); + return err; + } + return hn_vf_configure_locked(dev, dev_conf); + +subchan_cleanup: + for (i = 1; i < HN_MAX_CHANNELS; i++) { + if (hv->channels[i] != NULL) { + rte_vmbus_chan_close(hv->channels[i]); + hv->channels[i] = NULL; + } + } + hv->num_queues = 1; + for (i = 0; i < NDIS_HASH_INDCNT; i++) + hv->rss_ind[i] = 0; + return err; + +reinit_failed: + /* + * Device is in a broken state after failed reinit. + * Try to re-establish minimal connectivity. + */ + PMD_DRV_LOG(ERR, + "reinit failed (err %d), attempting recovery", err); + if (hv->channels[0] == NULL) { + if (device_unmapped) { + if (rte_vmbus_map_device(hv->vmbus)) { + hv->num_queues = 0; + PMD_DRV_LOG(ERR, + "recovery failed, could not re-map device"); + return err; + } + hv->rxbuf_res = hv->vmbus->resource[HV_RECV_BUF_MAP]; + hv->chim_res = hv->vmbus->resource[HV_SEND_BUF_MAP]; + } + if (rte_vmbus_chan_open(hv->vmbus, &hv->channels[0]) == 0) { + if (hn_attach(hv, dev->data->mtu) == 0) { + hv->num_queues = 1; + PMD_DRV_LOG(NOTICE, + "recovery successful on primary channel"); + } else { + rte_vmbus_chan_close(hv->channels[0]); + rte_free(hv->channels[0]); + hv->channels[0] = NULL; + hv->num_queues = 0; + PMD_DRV_LOG(ERR, + "recovery failed, device unusable"); + } + } else { + hv->num_queues = 0; + PMD_DRV_LOG(ERR, + "recovery failed, device unusable"); + } + } else { + hv->num_queues = 1; + } + return err; } static int hn_dev_stats_get(struct rte_eth_dev *dev, @@ -1067,6 +1194,7 @@ hn_dev_stop(struct rte_eth_dev *dev) { struct hn_data *hv = dev->data->dev_private; int i, ret; + unsigned int retry; PMD_INIT_FUNC_TRACE(); dev->data->dev_started = 0; @@ -1075,6 +1203,29 @@ hn_dev_stop(struct rte_eth_dev *dev) hn_rndis_set_rxfilter(hv, 0); ret = hn_vf_stop(dev); + /* + * Drain pending TX completions to prevent stale completions + * from corrupting queue state after port reconfiguration. + */ + for (retry = 0; retry < 100; retry++) { + uint32_t pending = 0; + + for (i = 0; i < hv->num_queues; i++) { + struct hn_tx_queue *txq = dev->data->tx_queues[i]; + + if (txq == NULL) + continue; + hn_process_events(hv, i, 0); + pending += rte_mempool_in_use_count(txq->txdesc_pool); + } + if (pending == 0) + break; + rte_delay_ms(10); + } + if (retry >= 100) + PMD_DRV_LOG(WARNING, + "Failed to drain all TX completions"); + for (i = 0; i < hv->num_queues; i++) { dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c index 0ecfaf54ea..e77232bfb3 100644 --- a/drivers/net/netvsc/hn_vf.c +++ b/drivers/net/netvsc/hn_vf.c @@ -637,12 +637,14 @@ void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id) { struct rte_eth_dev *vf_dev; - rte_rwlock_read_lock(&hv->vf_lock); + rte_rwlock_write_lock(&hv->vf_lock); vf_dev = hn_get_vf_dev(hv); - if (vf_dev && vf_dev->dev_ops->tx_queue_release) + if (vf_dev && vf_dev->dev_ops->tx_queue_release) { (*vf_dev->dev_ops->tx_queue_release)(vf_dev, queue_id); + vf_dev->data->tx_queues[queue_id] = NULL; + } - rte_rwlock_read_unlock(&hv->vf_lock); + rte_rwlock_write_unlock(&hv->vf_lock); } int hn_vf_rx_queue_setup(struct rte_eth_dev *dev, @@ -669,11 +671,13 @@ void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id) { struct rte_eth_dev *vf_dev; - rte_rwlock_read_lock(&hv->vf_lock); + rte_rwlock_write_lock(&hv->vf_lock); vf_dev = hn_get_vf_dev(hv); - if (vf_dev && vf_dev->dev_ops->rx_queue_release) + if (vf_dev && vf_dev->dev_ops->rx_queue_release) { (*vf_dev->dev_ops->rx_queue_release)(vf_dev, queue_id); - rte_rwlock_read_unlock(&hv->vf_lock); + vf_dev->data->rx_queues[queue_id] = NULL; + } + rte_rwlock_write_unlock(&hv->vf_lock); } int hn_vf_stats_get(struct rte_eth_dev *dev, -- 2.43.0

