During PF initiated reset recovery, iavf_dev_close() sends an
extra 'VIRTCHNL_OP_RESET_VF' while recovery is already in progress.
That second reset can leave PF/VF virtchnl state inconsistent and
cause 'VIRTCHNL_OP_CONFIG_VSI_QUEUES' to fail with 'ERR_PARAM' after
ToR link flap/power-cycle, leaving the VF unable to recover.
This results in connection loss.
This patch introduces a new flag 'pf_reset_in_progress', which
is set only when iavf_handle_hw_reset() is entered for a
PF-initiated reset (vf_initiated_reset is false), and
it is cleared on exit.
The aforesaid flag is used to prevent sending close-time VF
reset and related close-time virtchnl operation messages to the
AdminQ when PF triggered reset recovery is set.
This is done to avoid duplicate VF reset requests while preserving
normal behavior for application-driven close or VF-initiated reinit.
Fixes: 675a104e2e94 ("net/iavf: fix abnormal disable HW interrupt")
Fixes: b34fe66ea893 ("net/iavf: delay VF reset command")
Fixes: 5e03e316c753 ("net/iavf: handle virtchnl event message without
interrupt")
Cc: [email protected]
Signed-off-by: Anurag Mandal <[email protected]>
---
V6: Addressed Ciara Loftus's review comments
- changed to concise relase note
- removed unwarranted comment
- added proper comments in two places
- aligned commits with latest 'next-net-intel-for-next-net' branch
V5: Addressed Ciara Loftus's review comments
- added separate flag for PF initiated reset recovery
V4: Addressed Ciara Loftus's review comments
- split VF reset from other code changes
V3: Addressed latest ai-code-review comments
V2: Addressed ai-code-review comments
doc/guides/rel_notes/release_26_07.rst | 2 ++
drivers/net/intel/iavf/iavf.h | 1 +
drivers/net/intel/iavf/iavf_ethdev.c | 42 +++++++++++++++++---------
drivers/net/intel/iavf/iavf_vchnl.c | 18 +++++++++--
4 files changed, 46 insertions(+), 17 deletions(-)
diff --git a/doc/guides/rel_notes/release_26_07.rst
b/doc/guides/rel_notes/release_26_07.rst
index b5285af5fe..3832410363 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -121,6 +121,8 @@ New Features
* Added support for transmitting LLDP packets based on mbuf packet type.
* Implemented AVX2 context descriptor transmit paths.
+ * Fixed duplicate send of 'VIRTCHNL_OP_RESET_VF' during PF reset recovery
+ which could cause virtchnl state corruption.
* **Updated NVIDIA mlx5 ethernet driver.**
diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 4444602a30..293adaf6c9 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -292,6 +292,7 @@ struct iavf_info {
bool in_reset_recovery;
bool reset_pending;
+ bool pf_reset_in_progress;
uint32_t ptp_caps;
rte_spinlock_t phc_time_aq_lock;
diff --git a/drivers/net/intel/iavf/iavf_ethdev.c
b/drivers/net/intel/iavf/iavf_ethdev.c
index ec1ad02826..4c8a1895e4 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -3168,22 +3168,29 @@ iavf_dev_close(struct rte_eth_dev *dev)
ret = iavf_dev_stop(dev);
/*
- * Release redundant queue resource when close the dev
- * so that other vfs can re-use the queues.
+ * Prevent sending close-time virtchnl messages to the AdminQ
+ * during PF-initiated reset recovery.
*/
- if (vf->lv_enabled) {
- ret = iavf_request_queues(dev, IAVF_MAX_NUM_QUEUES_DFLT);
- if (ret)
- PMD_DRV_LOG(ERR, "Reset the num of queues failed");
+ if (!vf->pf_reset_in_progress) {
- vf->max_rss_qregion = IAVF_MAX_NUM_QUEUES_DFLT;
- }
+ /*
+ * Release redundant queue resource when close the dev
+ * so that other vfs can re-use the queues.
+ */
+ if (vf->lv_enabled) {
+ ret = iavf_request_queues(dev,
IAVF_MAX_NUM_QUEUES_DFLT);
+ if (ret)
+ PMD_DRV_LOG(ERR, "Reset the num of queues
failed");
+ vf->max_rss_qregion = IAVF_MAX_NUM_QUEUES_DFLT;
+ }
- /* Disable promiscuous mode before resetting the VF. This is to avoid
- * potential issues when the PF is bound to the kernel driver.
- */
- if (vf->promisc_unicast_enabled || vf->promisc_multicast_enabled)
- iavf_config_promisc(adapter, false, false);
+ /*
+ * Disable promiscuous mode before resetting the VF. This is to
avoid
+ * potential issues when the PF is bound to the kernel driver.
+ */
+ if (vf->promisc_unicast_enabled ||
vf->promisc_multicast_enabled)
+ iavf_config_promisc(adapter, false, false);
+ }
adapter->closed = true;
@@ -3196,7 +3203,12 @@ iavf_dev_close(struct rte_eth_dev *dev)
iavf_flow_flush(dev, NULL);
iavf_flow_uninit(adapter);
- iavf_vf_reset(hw);
+ /*
+ * Prevent sending VIRTCHNL_OP_RESET_VF during PF-initiated
+ * reset recovery.
+ */
+ if (!vf->pf_reset_in_progress)
+ iavf_vf_reset(hw);
/*
* If a reset is pending, wait for the PF to disable the VF's admin
* receive queue (its first reset action) before we shut it down
@@ -3380,6 +3392,7 @@ iavf_handle_hw_reset(struct rte_eth_dev *dev, bool
vf_initiated_reset)
}
vf->in_reset_recovery = true;
+ vf->pf_reset_in_progress = !vf_initiated_reset;
iavf_set_no_poll(adapter, false);
/* Call the pre reset callback */
@@ -3430,6 +3443,7 @@ iavf_handle_hw_reset(struct rte_eth_dev *dev, bool
vf_initiated_reset)
vf->post_reset_cb(dev->data->port_id, ret,
vf->post_reset_cb_arg);
vf->in_reset_recovery = false;
+ vf->pf_reset_in_progress = false;
iavf_set_no_poll(adapter, false);
return;
diff --git a/drivers/net/intel/iavf/iavf_vchnl.c
b/drivers/net/intel/iavf/iavf_vchnl.c
index 0643a835d5..08ab11ccf1 100644
--- a/drivers/net/intel/iavf/iavf_vchnl.c
+++ b/drivers/net/intel/iavf/iavf_vchnl.c
@@ -283,9 +283,21 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter,
uint16_t buf_len,
vf->link_up ? "up" : "down");
break;
case VIRTCHNL_EVENT_RESET_IMPENDING:
- vf->vf_reset = true;
- iavf_set_no_poll(adapter, false);
- PMD_DRV_LOG(INFO, "VF is resetting");
+ /*
+ * Force link down on impending reset to drop
+ * the cached link-up state; a fresh LSC up
+ * event will be re-issued by the PF once the
+ * VF is reinitialised.
+ */
+ vf->link_up = false;
+ if (!vf->vf_reset) {
+ vf->vf_reset = true;
+ iavf_set_no_poll(adapter, false);
+ iavf_dev_event_post(vf->eth_dev,
+ RTE_ETH_EVENT_INTR_RESET,
+ NULL, 0);
+ }
+ PMD_DRV_LOG(DEBUG, "VF is resetting");
break;
case VIRTCHNL_EVENT_PF_DRIVER_CLOSE:
vf->dev_closed = true;
--
2.34.1