Previously, mlx5_shared_rxq_match() rejected any MTU mismatch between
a port and the shared RX queue, even when the queue hardware was already
running. This caused failures when hot-adding representors after runtime
MTU changes (new port has default MTU=1500, but running queue was created
with MTU=9000). It also prevented proper port reconfiguration scenarios.

The fix allows MTU mismatches when rxq_ctrl->obj != NULL (queue is
running), since runtime MTU changes via rte_eth_dev_set_mtu() only update
software bookkeeping without recreating hardware resources. Stopped queues
still enforce MTU matching to trigger proper reconfiguration.

This patch also removes the redundant priv->mtu field and uses
dev->data->mtu consistently for MTU tracking throughout the driver.

Fixes: 4414eb800708 ("net/mlx5: store MTU at Rx queue allocation time")
Fixes: 09c2555303be ("net/mlx5: support shared Rx queue")
Cc: [email protected]
Cc: [email protected]

Signed-off-by: Shani Peretz <[email protected]>
Acked-by: Dariusz Sosnowski <[email protected]>
---
 doc/guides/nics/mlx5.rst           |  6 +++
 drivers/net/mlx5/linux/mlx5_os.c   |  6 +--
 drivers/net/mlx5/mlx5.h            |  1 -
 drivers/net/mlx5/mlx5_ethdev.c     |  3 --
 drivers/net/mlx5/mlx5_rxq.c        | 85 ++++++++++++++++++++++++------
 drivers/net/mlx5/windows/mlx5_os.c |  6 +--
 6 files changed, 78 insertions(+), 29 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 904a0ac358..66323907a9 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -1827,6 +1827,12 @@ Limitations
 
 #. Counters of received packets and bytes of queues in the same group and 
queue ID are same.
 
+#. Each Rx queue in share group must have the same queue configuration.
+
+#. Ports in share group must have equal MTU at port start time.
+
+#. Reconfiguring a shared queue while it is in use (started or referenced by 
flows) is not allowed.
+
 
 .. _mlx5_rx_threshold:
 
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 7f73183bb1..e8c4fb62c8 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1334,7 +1334,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        priv->sh = sh;
        priv->dev_port = spawn->phys_port;
        priv->pci_dev = spawn->pci_dev;
-       priv->mtu = RTE_ETHER_MTU;
        /* Some internal functions rely on Netlink sockets, open them now. */
        priv->nl_socket_rdma = nl_rdma;
        priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE, 0);
@@ -1606,14 +1605,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        }
 #endif
        /* Get actual MTU if possible. */
-       err = mlx5_get_mtu(eth_dev, &priv->mtu);
+       err = mlx5_get_mtu(eth_dev, &eth_dev->data->mtu);
        if (err) {
                err = rte_errno;
                goto error;
        }
-       eth_dev->data->mtu = priv->mtu;
        DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id,
-               priv->mtu);
+               eth_dev->data->mtu);
        /* Initialize burst functions to prevent crashes before link-up. */
        eth_dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
        eth_dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 966e802f5f..551f3b9d17 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1983,7 +1983,6 @@ struct mlx5_priv {
        uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
        unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
        /* Device properties. */
-       uint16_t mtu; /* Configured MTU. */
        uint16_t min_mtu; /* Minimum MTU allowed on the NIC. */
        uint16_t max_mtu; /* Maximum MTU allowed on the NIC. */
        unsigned int isolated:1; /* Whether isolated mode is enabled. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index c93a7ac4f2..00a1d444ef 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -679,7 +679,6 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev, 
size_t *no_of_elements)
 int
 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
 {
-       struct mlx5_priv *priv = dev->data->dev_private;
        uint16_t kern_mtu = 0;
        int ret;
 
@@ -688,7 +687,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
                return ret;
 
        if (kern_mtu == mtu) {
-               priv->mtu = mtu;
                DRV_LOG(DEBUG, "port %u adapter MTU was already set to %u",
                        dev->data->port_id, mtu);
                return 0;
@@ -702,7 +700,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
        if (ret)
                return ret;
        if (kern_mtu == mtu) {
-               priv->mtu = mtu;
                DRV_LOG(DEBUG, "port %u adapter MTU set to %u",
                        dev->data->port_id, mtu);
                return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 9210a92c5f..c4ba746d47 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -659,8 +659,6 @@ mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t 
idx, uint16_t *desc,
                        struct mlx5_rxq_ctrl **rxq_ctrl)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
-       struct mlx5_rxq_priv *rxq;
-       bool empty;
 
        if (*desc > mlx5_dev_get_max_wq_size(priv->sh)) {
                DRV_LOG(ERR,
@@ -699,14 +697,6 @@ mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t 
idx, uint16_t *desc,
                if ((*rxq_ctrl)->obj != NULL)
                        /* Some port using shared Rx queue has been started. */
                        return 0;
-               /* Release all owner RxQ to reconfigure Shared RxQ. */
-               do {
-                       rxq = LIST_FIRST(&(*rxq_ctrl)->owners);
-                       LIST_REMOVE(rxq, owner_entry);
-                       empty = LIST_EMPTY(&(*rxq_ctrl)->owners);
-                       mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx);
-               } while (!empty);
-               *rxq_ctrl = NULL;
        }
        return 0;
 }
@@ -780,10 +770,21 @@ mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl, 
struct rte_eth_dev *dev,
                        dev->data->port_id, idx);
                return false;
        }
-       if (priv->mtu != rxq_ctrl->mtu) {
-               DRV_LOG(ERR, "port %u queue index %u failed to join shared 
group: mtu mismatch",
-                       dev->data->port_id, idx);
-               return false;
+       if (dev->data->mtu != rxq_ctrl->mtu) {
+               /*
+                * MTU mismatch is only a problem when the queue hasn't been 
started yet.
+                * If rxq_ctrl->obj is NULL, the queue hardware objects haven't 
been created,
+                * meaning we're in the initial configuration phase where MTU 
must match.
+                * If obj != NULL, the queue is already running with its 
hardware configured,
+                * and runtime MTU changes are safe as they only update 
software bookkeeping
+                * without recreating hardware resources.
+                */
+               if (rxq_ctrl->obj == NULL) {
+                       DRV_LOG(DEBUG, "port %u queue index %u: mtu mismatch 
with existing shared rxq_ctrl "
+                                       "(port mtu=%u rxq_ctrl mtu=%u), 
reconfiguration needed",
+                                       dev->data->port_id, idx, 
dev->data->mtu, rxq_ctrl->mtu);
+                       return false;
+               }
        }
        if (priv->dev_data->dev_conf.intr_conf.rxq !=
            spriv->dev_data->dev_conf.intr_conf.rxq) {
@@ -926,8 +927,57 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 
uint16_t desc,
                if (rxq_ctrl != NULL &&
                    !mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket,
                                           conf, mp)) {
-                       rte_errno = EINVAL;
-                       return -rte_errno;
+                       struct mlx5_rxq_priv *rxq_tmp;
+                       bool empty;
+
+                       /*
+                        * Configuration mismatch detected with existing shared 
RXQ.
+                        * We need to reconfigure, but only if it's safe to do 
so.
+                        *
+                        * First check: If hardware objects are allocated, the 
shared queue has
+                        * been started. Reconfiguration would require 
destroying and recreating
+                        * hardware resources, which cannot be done while the 
queue is active.
+                        * Return EBUSY to force caller to stop the queue first.
+                        */
+                       if (rxq_ctrl->obj != NULL) {
+                               DRV_LOG(ERR, "port %u queue index %u: cannot 
reconfigure shared RXQ while started",
+                                       dev->data->port_id, idx);
+                               rte_errno = EBUSY;
+                               return -rte_errno;
+                       }
+
+                       /*
+                        * Second check: Even if hardware objects aren't 
allocated yet,
+                        * verify that no owner port is actively using this 
queue.
+                        * refcnt == 1 means the queue exists but is idle (only 
setup reference).
+                        * refcnt > 1 means the queue is being used by flows or 
other components.
+                        * This prevents releasing a queue that other ports 
depend on.
+                        */
+                       LIST_FOREACH(rxq_tmp, &rxq_ctrl->owners, owner_entry) {
+                               if (rxq_tmp->refcnt > 1) {
+                                       DRV_LOG(ERR, "port %u queue index %u: 
cannot reconfigure shared RXQ "
+                                               "while other ports are running",
+                                               dev->data->port_id, idx);
+                                       rte_errno = EBUSY;
+                                       return -rte_errno;
+                               }
+                       }
+
+                       /*
+                        * Safe to reconfigure: hardware not started and no 
active users.
+                        * Release all owner ports from the existing shared 
rxq_ctrl.
+                        * This will decrement references and eventually free 
the old rxq_ctrl.
+                        * Setting rxq_ctrl to NULL triggers creation of a new 
one below with
+                        * the updated configuration.
+                        */
+                       do {
+                               rxq_tmp = LIST_FIRST(&rxq_ctrl->owners);
+                               LIST_REMOVE(rxq_tmp, owner_entry);
+                               empty = LIST_EMPTY(&rxq_ctrl->owners);
+                               mlx5_rxq_release(ETH_DEV(rxq_tmp->priv), 
rxq_tmp->idx);
+                       } while (!empty);
+
+                       rxq_ctrl = NULL;
                }
        } else {
                res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl);
@@ -1813,7 +1863,8 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, 
uint16_t desc,
        LIST_INIT(&tmpl->owners);
        MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG);
        /*
-        * Save the original MTU to check against for shared rx queues.
+        * Save the current MTU to check against for shared rx queues.
+        * Use dev->data->mtu which reflects the actual current MTU.
         */
        tmpl->mtu = dev->data->mtu;
        /*
diff --git a/drivers/net/mlx5/windows/mlx5_os.c 
b/drivers/net/mlx5/windows/mlx5_os.c
index 4eadc872a5..5dcabfbed2 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -396,7 +396,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        priv->sh = sh;
        priv->dev_port = spawn->phys_port;
        priv->pci_dev = spawn->pci_dev;
-       priv->mtu = RTE_ETHER_MTU;
        priv->mp_id.port_id = port_id;
        strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
        priv->representor = !!switch_info->representor;
@@ -504,14 +503,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        }
 #endif
        /* Get actual MTU if possible. */
-       err = mlx5_get_mtu(eth_dev, &priv->mtu);
+       err = mlx5_get_mtu(eth_dev, &eth_dev->data->mtu);
        if (err) {
                err = rte_errno;
                goto error;
        }
-       eth_dev->data->mtu = priv->mtu;
        DRV_LOG(DEBUG, "port %u MTU is %u.", eth_dev->data->port_id,
-               priv->mtu);
+               eth_dev->data->mtu);
        /* Initialize burst functions to prevent crashes before link-up. */
        eth_dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
        eth_dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
-- 
2.43.0

Reply via email to