Previously, mlx5_shared_rxq_match() rejected any MTU mismatch between
a port and the shared RX queue, even when the queue hardware was already
running. This caused failures when hot-adding representors after runtime
MTU changes (new port has default MTU=1500, but running queue was created
with MTU=9000). It also prevented proper port reconfiguration scenarios.
The fix allows MTU mismatches when rxq_ctrl->obj != NULL (queue is
running), since runtime MTU changes via rte_eth_dev_set_mtu() only update
software bookkeeping without recreating hardware resources. Stopped queues
still enforce MTU matching to trigger proper reconfiguration.
This patch also removes the redundant priv->mtu field and uses
dev->data->mtu consistently for MTU tracking throughout the driver.
Fixes: 4414eb800708 ("net/mlx5: store MTU at Rx queue allocation time")
Fixes: 09c2555303be ("net/mlx5: support shared Rx queue")
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Shani Peretz <[email protected]>
Acked-by: Dariusz Sosnowski <[email protected]>
---
doc/guides/nics/mlx5.rst | 6 +++
drivers/net/mlx5/linux/mlx5_os.c | 6 +--
drivers/net/mlx5/mlx5.h | 1 -
drivers/net/mlx5/mlx5_ethdev.c | 3 --
drivers/net/mlx5/mlx5_rxq.c | 85 ++++++++++++++++++++++++------
drivers/net/mlx5/windows/mlx5_os.c | 6 +--
6 files changed, 78 insertions(+), 29 deletions(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 904a0ac358..66323907a9 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -1827,6 +1827,12 @@ Limitations
#. Counters of received packets and bytes of queues in the same group and
queue ID are same.
+#. Each Rx queue in share group must have the same queue configuration.
+
+#. Ports in share group must have equal MTU at port start time.
+
+#. Reconfiguring a shared queue while it is in use (started or referenced by
flows) is not allowed.
+
.. _mlx5_rx_threshold:
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 7f73183bb1..e8c4fb62c8 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1334,7 +1334,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
priv->sh = sh;
priv->dev_port = spawn->phys_port;
priv->pci_dev = spawn->pci_dev;
- priv->mtu = RTE_ETHER_MTU;
/* Some internal functions rely on Netlink sockets, open them now. */
priv->nl_socket_rdma = nl_rdma;
priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE, 0);
@@ -1606,14 +1605,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
}
#endif
/* Get actual MTU if possible. */
- err = mlx5_get_mtu(eth_dev, &priv->mtu);
+ err = mlx5_get_mtu(eth_dev, ð_dev->data->mtu);
if (err) {
err = rte_errno;
goto error;
}
- eth_dev->data->mtu = priv->mtu;
DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id,
- priv->mtu);
+ eth_dev->data->mtu);
/* Initialize burst functions to prevent crashes before link-up. */
eth_dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
eth_dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 966e802f5f..551f3b9d17 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1983,7 +1983,6 @@ struct mlx5_priv {
uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
/* Device properties. */
- uint16_t mtu; /* Configured MTU. */
uint16_t min_mtu; /* Minimum MTU allowed on the NIC. */
uint16_t max_mtu; /* Maximum MTU allowed on the NIC. */
unsigned int isolated:1; /* Whether isolated mode is enabled. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index c93a7ac4f2..00a1d444ef 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -679,7 +679,6 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev,
size_t *no_of_elements)
int
mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
{
- struct mlx5_priv *priv = dev->data->dev_private;
uint16_t kern_mtu = 0;
int ret;
@@ -688,7 +687,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
return ret;
if (kern_mtu == mtu) {
- priv->mtu = mtu;
DRV_LOG(DEBUG, "port %u adapter MTU was already set to %u",
dev->data->port_id, mtu);
return 0;
@@ -702,7 +700,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
if (ret)
return ret;
if (kern_mtu == mtu) {
- priv->mtu = mtu;
DRV_LOG(DEBUG, "port %u adapter MTU set to %u",
dev->data->port_id, mtu);
return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 9210a92c5f..c4ba746d47 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -659,8 +659,6 @@ mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t
idx, uint16_t *desc,
struct mlx5_rxq_ctrl **rxq_ctrl)
{
struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_rxq_priv *rxq;
- bool empty;
if (*desc > mlx5_dev_get_max_wq_size(priv->sh)) {
DRV_LOG(ERR,
@@ -699,14 +697,6 @@ mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t
idx, uint16_t *desc,
if ((*rxq_ctrl)->obj != NULL)
/* Some port using shared Rx queue has been started. */
return 0;
- /* Release all owner RxQ to reconfigure Shared RxQ. */
- do {
- rxq = LIST_FIRST(&(*rxq_ctrl)->owners);
- LIST_REMOVE(rxq, owner_entry);
- empty = LIST_EMPTY(&(*rxq_ctrl)->owners);
- mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx);
- } while (!empty);
- *rxq_ctrl = NULL;
}
return 0;
}
@@ -780,10 +770,21 @@ mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl,
struct rte_eth_dev *dev,
dev->data->port_id, idx);
return false;
}
- if (priv->mtu != rxq_ctrl->mtu) {
- DRV_LOG(ERR, "port %u queue index %u failed to join shared
group: mtu mismatch",
- dev->data->port_id, idx);
- return false;
+ if (dev->data->mtu != rxq_ctrl->mtu) {
+ /*
+ * MTU mismatch is only a problem when the queue hasn't been
started yet.
+ * If rxq_ctrl->obj is NULL, the queue hardware objects haven't
been created,
+ * meaning we're in the initial configuration phase where MTU
must match.
+ * If obj != NULL, the queue is already running with its
hardware configured,
+ * and runtime MTU changes are safe as they only update
software bookkeeping
+ * without recreating hardware resources.
+ */
+ if (rxq_ctrl->obj == NULL) {
+ DRV_LOG(DEBUG, "port %u queue index %u: mtu mismatch
with existing shared rxq_ctrl "
+ "(port mtu=%u rxq_ctrl mtu=%u),
reconfiguration needed",
+ dev->data->port_id, idx,
dev->data->mtu, rxq_ctrl->mtu);
+ return false;
+ }
}
if (priv->dev_data->dev_conf.intr_conf.rxq !=
spriv->dev_data->dev_conf.intr_conf.rxq) {
@@ -926,8 +927,57 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
uint16_t desc,
if (rxq_ctrl != NULL &&
!mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket,
conf, mp)) {
- rte_errno = EINVAL;
- return -rte_errno;
+ struct mlx5_rxq_priv *rxq_tmp;
+ bool empty;
+
+ /*
+ * Configuration mismatch detected with existing shared
RXQ.
+ * We need to reconfigure, but only if it's safe to do
so.
+ *
+ * First check: If hardware objects are allocated, the
shared queue has
+ * been started. Reconfiguration would require
destroying and recreating
+ * hardware resources, which cannot be done while the
queue is active.
+ * Return EBUSY to force caller to stop the queue first.
+ */
+ if (rxq_ctrl->obj != NULL) {
+ DRV_LOG(ERR, "port %u queue index %u: cannot
reconfigure shared RXQ while started",
+ dev->data->port_id, idx);
+ rte_errno = EBUSY;
+ return -rte_errno;
+ }
+
+ /*
+ * Second check: Even if hardware objects aren't
allocated yet,
+ * verify that no owner port is actively using this
queue.
+ * refcnt == 1 means the queue exists but is idle (only
setup reference).
+ * refcnt > 1 means the queue is being used by flows or
other components.
+ * This prevents releasing a queue that other ports
depend on.
+ */
+ LIST_FOREACH(rxq_tmp, &rxq_ctrl->owners, owner_entry) {
+ if (rxq_tmp->refcnt > 1) {
+ DRV_LOG(ERR, "port %u queue index %u:
cannot reconfigure shared RXQ "
+ "while other ports are running",
+ dev->data->port_id, idx);
+ rte_errno = EBUSY;
+ return -rte_errno;
+ }
+ }
+
+ /*
+ * Safe to reconfigure: hardware not started and no
active users.
+ * Release all owner ports from the existing shared
rxq_ctrl.
+ * This will decrement references and eventually free
the old rxq_ctrl.
+ * Setting rxq_ctrl to NULL triggers creation of a new
one below with
+ * the updated configuration.
+ */
+ do {
+ rxq_tmp = LIST_FIRST(&rxq_ctrl->owners);
+ LIST_REMOVE(rxq_tmp, owner_entry);
+ empty = LIST_EMPTY(&rxq_ctrl->owners);
+ mlx5_rxq_release(ETH_DEV(rxq_tmp->priv),
rxq_tmp->idx);
+ } while (!empty);
+
+ rxq_ctrl = NULL;
}
} else {
res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl);
@@ -1813,7 +1863,8 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx,
uint16_t desc,
LIST_INIT(&tmpl->owners);
MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG);
/*
- * Save the original MTU to check against for shared rx queues.
+ * Save the current MTU to check against for shared rx queues.
+ * Use dev->data->mtu which reflects the actual current MTU.
*/
tmpl->mtu = dev->data->mtu;
/*
diff --git a/drivers/net/mlx5/windows/mlx5_os.c
b/drivers/net/mlx5/windows/mlx5_os.c
index 4eadc872a5..5dcabfbed2 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -396,7 +396,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
priv->sh = sh;
priv->dev_port = spawn->phys_port;
priv->pci_dev = spawn->pci_dev;
- priv->mtu = RTE_ETHER_MTU;
priv->mp_id.port_id = port_id;
strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
priv->representor = !!switch_info->representor;
@@ -504,14 +503,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
}
#endif
/* Get actual MTU if possible. */
- err = mlx5_get_mtu(eth_dev, &priv->mtu);
+ err = mlx5_get_mtu(eth_dev, ð_dev->data->mtu);
if (err) {
err = rte_errno;
goto error;
}
- eth_dev->data->mtu = priv->mtu;
DRV_LOG(DEBUG, "port %u MTU is %u.", eth_dev->data->port_id,
- priv->mtu);
+ eth_dev->data->mtu);
/* Initialize burst functions to prevent crashes before link-up. */
eth_dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
eth_dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
--
2.43.0