BlueField DPUs with Socket Direct (SD) can be connected to 2 different
CPUs on the host system.
Each host CPU sees 2 PFs.
Each PF is connected to one of the physical ports.

On BlueField DPU ARM Linux netdevs map to PFs/ports as follows:

- p0 and p1 to physical ports 0 and 1 respectively,
- pf0hpf and pf2hpf to CPU0 pf0 and CPU1 pf0 respectively,
- pf1hpf and pf3hpf to CPU0 pf1 and CPU1 pf1 respectively.

There are several possible ways to use such a setup:

1. Single E-Switch (embedded switch) per each CPU PF to
   physical port connection.
2. Shared E-Switch for related CPU PFs:
    - For example, both pf0hpf and pf2hpf are in the same E-Switch.
3. Multiport E-Switch (MPESW).

Existing probing logic in mlx5 PMD did not support case (2).
In this case there is one physical port (uplink in mlx5 naming)
and 2 host PFs.
On such a setup mlx5 generated port names with the following syntax:

    03:00.0_representor_vfX

Because setup was not recognized as neither bond nor MPESW.
Since BlueField with Socket Direct would have 2 host PFs,
such probing logic caused DPDK port name collisions
on the attempt to probe 2 host PFs at the same time.

This patch addresses that by changing probing and naming logic
to be more generic. This is achieved through:

- Adding logic for calculation of number of uplinks and
  number of host PFs available on the system.
- Change port name generation logic to be based on these numbers
  instead of specific setup type.
- Change representor matching logic during probing
  to respect all parameters passed in devargs.
  Specifically, controller index, PF index and VF indexes are used.

Fixes: 11c73de9ef63 ("net/mlx5: probe multi-port E-Switch device")
Cc: [email protected]

Signed-off-by: Dariusz Sosnowski <[email protected]>
Acked-by: Bing Zhao <[email protected]>
---
 drivers/net/mlx5/linux/mlx5_os.c | 342 +++++++++++++++++++++----------
 drivers/net/mlx5/mlx5.h          |   2 +
 2 files changed, 241 insertions(+), 103 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 405aa9799c..324d65cf32 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1047,6 +1047,171 @@ mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)
                        "available.", dev->data->port_id);
 }
 
+static inline bool
+mlx5_ignore_pf_representor(const struct rte_eth_devargs *eth_da)
+{
+       return (eth_da->flags & RTE_ETH_DEVARG_REPRESENTOR_IGNORE_PF) != 0;
+}
+
+static bool
+is_standard_eswitch(const struct mlx5_dev_spawn_data *spawn)
+{
+       bool is_bond = spawn->pf_bond >= 0;
+
+       return !is_bond && spawn->nb_uplinks <= 1 && spawn->nb_hpfs <= 1;
+}
+
+static bool
+is_hpf(const struct mlx5_dev_spawn_data *spawn)
+{
+       return spawn->info.port_name == -1 &&
+              spawn->info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFHPF;
+}
+
+static int
+build_port_name(struct rte_device *dpdk_dev,
+               struct mlx5_dev_spawn_data *spawn,
+               char *name,
+               size_t name_sz)
+{
+       bool is_bond = spawn->pf_bond >= 0;
+       int written = 0;
+       int ret;
+
+       ret = snprintf(name, name_sz, "%s", dpdk_dev->name);
+       if (ret < 0)
+               return ret;
+       written += ret;
+       if (written >= (int)name_sz)
+               return written;
+
+       /*
+        * Whenever bond device is detected, include IB device name.
+        * This is kept to keep port naming backward compatible.
+        */
+       if (is_bond) {
+               ret = snprintf(name + written, name_sz - written, "_%s", 
spawn->phys_dev_name);
+               if (ret < 0)
+                       return ret;
+               written += ret;
+               if (written >= (int)name_sz)
+                       return written;
+       }
+
+       if (spawn->info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
+               /* Add port to name if and only if there is more than one 
uplink. */
+               if (spawn->nb_uplinks <= 1)
+                       goto end;
+
+               ret = snprintf(name + written, name_sz - written, "_p%u", 
spawn->info.port_name);
+               if (ret < 0)
+                       return ret;
+               written += ret;
+               if (written >= (int)name_sz)
+                       return written;
+       } else if (spawn->info.representor) {
+               /*
+                * If port is a representor, then switchdev has been enabled.
+                * In that case add controller, PF and VF/SF indexes to port 
name
+                * if at least one of these conditions are met:
+                * 1. Device is a bond (VF-LAG).
+                * 2. There are multiple uplinks (MPESW).
+                * 3. There are multiple host PFs (BlueField socket direct).
+                *
+                * If none of these conditions apply, then it is assumed that
+                * this device manages a single non-shared E-Switch with single 
controller,
+                * where there is only one uplink/PF and one host PF (on 
BlueField).
+                */
+               if (!is_standard_eswitch(spawn))
+                       ret = snprintf(name + written, name_sz - written,
+                                      "_representor_c%dpf%d%s%u",
+                                      spawn->info.ctrl_num,
+                                      spawn->info.pf_num,
+                                      spawn->info.name_type ==
+                                      MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : 
"vf",
+                                      spawn->info.port_name);
+               else
+                       ret = snprintf(name + written, name_sz - written, 
"_representor_%s%u",
+                                      spawn->info.name_type ==
+                                      MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : 
"vf",
+                                      spawn->info.port_name);
+               if (ret < 0)
+                       return ret;
+               written += ret;
+               if (written >= (int)name_sz)
+                       return written;
+       }
+
+end:
+       return written;
+}
+
+static bool
+representor_match_uplink(const struct mlx5_dev_spawn_data *spawn,
+                        uint16_t port_name,
+                        const struct rte_eth_devargs *eth_da,
+                        uint16_t eth_da_pf_num)
+{
+       if (spawn->info.name_type != MLX5_PHYS_PORT_NAME_TYPE_UPLINK)
+               return false;
+       /* One of the uplinks will be a transfer proxy. Must be probed always. 
*/
+       if (spawn->info.master)
+               return true;
+       if (mlx5_ignore_pf_representor(eth_da))
+               return false;
+
+       return port_name == eth_da_pf_num;
+}
+
+static bool
+representor_match_port(const struct mlx5_dev_spawn_data *spawn,
+                      const struct rte_eth_devargs *eth_da)
+{
+       for (uint16_t p = 0; p < eth_da->nb_ports; ++p) {
+               uint16_t pf_num = eth_da->ports[p];
+
+               /* PF representor in devargs is interpreted as probing uplink 
port. */
+               if (eth_da->type == RTE_ETH_REPRESENTOR_PF) {
+                       if (representor_match_uplink(spawn, 
spawn->info.port_name, eth_da, pf_num))
+                               return true;
+
+                       continue;
+               }
+
+               /* Allow probing related uplink when VF/SF representor is 
requested. */
+               if ((eth_da->type == RTE_ETH_REPRESENTOR_VF ||
+                    eth_da->type == RTE_ETH_REPRESENTOR_SF) &&
+                   representor_match_uplink(spawn, spawn->info.pf_num, eth_da, 
pf_num))
+                       return true;
+
+               for (uint16_t f = 0; f < eth_da->nb_representor_ports; ++f) {
+                       uint16_t port_num = eth_da->representor_ports[f];
+                       bool pf_num_match;
+                       bool rep_num_match;
+
+                       /*
+                        * In standard E-Switch case, allow probing VFs even if 
wrong PF index
+                        * was provided.
+                        */
+                       if (is_standard_eswitch(spawn))
+                               pf_num_match = true;
+                       else
+                               pf_num_match = spawn->info.pf_num == pf_num;
+
+                       /* Host PF is indicated through VF/SF representor index 
== -1. */
+                       if (is_hpf(spawn))
+                               rep_num_match = port_num == UINT16_MAX;
+                       else
+                               rep_num_match = port_num == 
spawn->info.port_name;
+
+                       if (pf_num_match && rep_num_match)
+                               return true;
+               }
+       }
+
+       return false;
+}
+
 /**
  * Check if representor spawn info match devargs.
  *
@@ -1063,50 +1228,29 @@ mlx5_representor_match(struct mlx5_dev_spawn_data 
*spawn,
                       struct rte_eth_devargs *eth_da)
 {
        struct mlx5_switch_info *switch_info = &spawn->info;
-       unsigned int p, f;
-       uint16_t id;
-       uint16_t repr_id = mlx5_representor_id_encode(switch_info,
-                                                     eth_da->type);
+       unsigned int c;
+       bool ignore_ctrl_num = eth_da->nb_mh_controllers == 0 ||
+                              switch_info->name_type == 
MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
 
-       /*
-        * Assuming Multiport E-Switch device was detected,
-        * if spawned port is an uplink, check if the port
-        * was requested through representor devarg.
-        */
-       if (mlx5_is_probed_port_on_mpesw_device(spawn) &&
-           switch_info->name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
-               for (p = 0; p < eth_da->nb_ports; ++p)
-                       if (switch_info->port_name == eth_da->ports[p])
-                               return true;
-               rte_errno = EBUSY;
-               return false;
-       }
        switch (eth_da->type) {
        case RTE_ETH_REPRESENTOR_PF:
-               /*
-                * PF representors provided in devargs translate to uplink 
ports, but
-                * if and only if the device is a part of MPESW device.
-                */
-               if (!mlx5_is_probed_port_on_mpesw_device(spawn)) {
+               if (switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
                        rte_errno = EBUSY;
                        return false;
                }
                break;
        case RTE_ETH_REPRESENTOR_SF:
-               if (!(spawn->info.port_name == -1 &&
-                     switch_info->name_type ==
-                               MLX5_PHYS_PORT_NAME_TYPE_PFHPF) &&
-                   switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFSF) {
+               if (!is_hpf(spawn) &&
+                   switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFSF &&
+                   switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
                        rte_errno = EBUSY;
                        return false;
                }
                break;
        case RTE_ETH_REPRESENTOR_VF:
-               /* Allows HPF representor index -1 as exception. */
-               if (!(spawn->info.port_name == -1 &&
-                     switch_info->name_type ==
-                               MLX5_PHYS_PORT_NAME_TYPE_PFHPF) &&
-                   switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFVF) {
+               if (!is_hpf(spawn) &&
+                   switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFVF &&
+                   switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
                        rte_errno = EBUSY;
                        return false;
                }
@@ -1119,21 +1263,17 @@ mlx5_representor_match(struct mlx5_dev_spawn_data 
*spawn,
                DRV_LOG(ERR, "unsupported representor type");
                return false;
        }
-       /* Check representor ID: */
-       for (p = 0; p < eth_da->nb_ports; ++p) {
-               if (!mlx5_is_probed_port_on_mpesw_device(spawn) && 
spawn->pf_bond < 0) {
-                       /* For non-LAG mode, allow and ignore pf. */
-                       switch_info->pf_num = eth_da->ports[p];
-                       repr_id = mlx5_representor_id_encode(switch_info,
-                                                            eth_da->type);
-               }
-               for (f = 0; f < eth_da->nb_representor_ports; ++f) {
-                       id = MLX5_REPRESENTOR_ID
-                               (eth_da->ports[p], eth_da->type,
-                                eth_da->representor_ports[f]);
-                       if (repr_id == id)
+       if (!ignore_ctrl_num) {
+               for (c = 0; c < eth_da->nb_mh_controllers; ++c) {
+                       uint16_t ctrl_num = eth_da->mh_controllers[c];
+
+                       if (spawn->info.ctrl_num == ctrl_num &&
+                           representor_match_port(spawn, eth_da))
                                return true;
                }
+       } else {
+               if (representor_match_port(spawn, eth_da))
+                       return true;
        }
        rte_errno = EBUSY;
        return false;
@@ -1185,44 +1325,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
            !mlx5_representor_match(spawn, eth_da))
                return NULL;
        /* Build device name. */
-       if (spawn->pf_bond >= 0) {
-               /* Bonding device. */
-               if (!switch_info->representor) {
-                       err = snprintf(name, sizeof(name), "%s_%s",
-                                      dpdk_dev->name, spawn->phys_dev_name);
-               } else {
-                       err = snprintf(name, sizeof(name), 
"%s_%s_representor_c%dpf%d%s%u",
-                               dpdk_dev->name, spawn->phys_dev_name,
-                               switch_info->ctrl_num,
-                               switch_info->pf_num,
-                               switch_info->name_type ==
-                               MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
-                               switch_info->port_name);
-               }
-       } else if (mlx5_is_probed_port_on_mpesw_device(spawn)) {
-               /* MPESW device. */
-               if (switch_info->name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
-                       err = snprintf(name, sizeof(name), "%s_p%d",
-                                      dpdk_dev->name, spawn->mpesw_port);
-               } else {
-                       err = snprintf(name, sizeof(name), 
"%s_representor_c%dpf%d%s%u",
-                               dpdk_dev->name,
-                               switch_info->ctrl_num,
-                               switch_info->pf_num,
-                               switch_info->name_type ==
-                               MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
-                               switch_info->port_name);
-               }
-       } else {
-               /* Single device. */
-               if (!switch_info->representor)
-                       strlcpy(name, dpdk_dev->name, sizeof(name));
-               else
-                       err = snprintf(name, sizeof(name), 
"%s_representor_%s%u",
-                                dpdk_dev->name,
-                                switch_info->name_type ==
-                                MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
-                                switch_info->port_name);
+       err = build_port_name(dpdk_dev, spawn, name, sizeof(name));
+       if (err < 0) {
+               DRV_LOG(ERR, "Failed to build port name for IB device %s/%u",
+                       spawn->phys_dev_name, spawn->phys_port);
+               rte_errno = EINVAL;
+               return NULL;
        }
        if (err >= (int)sizeof(name))
                DRV_LOG(WARNING, "device name overflow %s", name);
@@ -2297,10 +2405,45 @@ mlx5_device_mpesw_pci_match(struct ibv_device *ibv,
        return -1;
 }
 
-static inline bool
-mlx5_ignore_pf_representor(const struct rte_eth_devargs *eth_da)
+static void
+calc_nb_uplinks_hpfs(struct ibv_device **ibv_match,
+                    unsigned int nd,
+                    struct mlx5_dev_spawn_data *list,
+                    unsigned int ns)
 {
-       return (eth_da->flags & RTE_ETH_DEVARG_REPRESENTOR_IGNORE_PF) != 0;
+       for (unsigned int i = 0; i != nd; i++) {
+               uint32_t nb_uplinks = 0;
+               uint32_t nb_hpfs = 0;
+               uint32_t j;
+
+               for (unsigned int j = 0; j != ns; j++) {
+                       if (strcmp(ibv_match[i]->name, list[j].phys_dev_name) 
!= 0)
+                               continue;
+
+                       if (list[j].info.name_type == 
MLX5_PHYS_PORT_NAME_TYPE_UPLINK)
+                               nb_uplinks++;
+                       else if (list[j].info.name_type == 
MLX5_PHYS_PORT_NAME_TYPE_PFHPF)
+                               nb_hpfs++;
+               }
+
+               if (nb_uplinks > 0 || nb_hpfs > 0) {
+                       for (j = 0; j != ns; j++) {
+                               if (strcmp(ibv_match[i]->name, 
list[j].phys_dev_name) != 0)
+                                       continue;
+
+                               list[j].nb_uplinks = nb_uplinks;
+                               list[j].nb_hpfs = nb_hpfs;
+                       }
+
+                       DRV_LOG(DEBUG, "IB device %s has %u uplinks, %u host 
PFs",
+                               ibv_match[i]->name,
+                               nb_uplinks,
+                               nb_hpfs);
+               } else {
+                       DRV_LOG(DEBUG, "IB device %s unable to recognize 
uplinks/host PFs",
+                               ibv_match[i]->name);
+               }
+       }
 }
 
 /**
@@ -2611,8 +2754,6 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
                                        if (list[ns].info.port_name == mpesw) {
                                                list[ns].info.master = 1;
                                                list[ns].info.representor = 0;
-                                       } else if 
(mlx5_ignore_pf_representor(&eth_da)) {
-                                               continue;
                                        } else {
                                                list[ns].info.master = 0;
                                                list[ns].info.representor = 1;
@@ -2629,17 +2770,14 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
                                case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
                                case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
                                case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
-                                       /* Only spawn representors related to 
the probed PF. */
-                                       if (list[ns].info.pf_num == owner_id) {
-                                               /*
-                                                * Ports of this type have PF 
index encoded in name,
-                                                * which translate to the 
related uplink port index.
-                                                */
-                                               list[ns].mpesw_port = 
list[ns].info.pf_num;
-                                               /* MPESW owner is also saved 
but not used now. */
-                                               list[ns].info.mpesw_owner = 
mpesw;
-                                               ns++;
-                                       }
+                                       /*
+                                        * Ports of this type have PF index 
encoded in name,
+                                        * which translate to the related 
uplink port index.
+                                        */
+                                       list[ns].mpesw_port = 
list[ns].info.pf_num;
+                                       /* MPESW owner is also saved but not 
used now. */
+                                       list[ns].info.mpesw_owner = mpesw;
+                                       ns++;
                                        break;
                                default:
                                        break;
@@ -2773,6 +2911,8 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
                }
        }
        MLX5_ASSERT(ns);
+       /* Calculate number of uplinks and host PFs for each matched IB device. 
*/
+       calc_nb_uplinks_hpfs(ibv_match, nd, list, ns);
        /*
         * Sort list to probe devices in natural order for users convenience
         * (i.e. master first, then representors from lowest to highest ID).
@@ -2780,16 +2920,12 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
        qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp);
        if (eth_da.type != RTE_ETH_REPRESENTOR_NONE) {
                /* Set devargs default values. */
-               if (eth_da.nb_mh_controllers == 0) {
-                       eth_da.nb_mh_controllers = 1;
-                       eth_da.mh_controllers[0] = 0;
-               }
                if (eth_da.nb_ports == 0 && ns > 0) {
                        if (list[0].pf_bond >= 0 && list[0].info.representor)
                                DRV_LOG(WARNING, "Representor on Bonding device 
should use pf#vf# syntax: %s",
                                        pci_dev->device.devargs->args);
                        eth_da.nb_ports = 1;
-                       eth_da.ports[0] = list[0].info.pf_num;
+                       eth_da.ports[0] = list[0].info.port_name;
                }
                if (eth_da.nb_representor_ports == 0) {
                        eth_da.nb_representor_ports = 1;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c54266ec26..f69db11735 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -214,6 +214,8 @@ struct mlx5_dev_cap {
 struct mlx5_dev_spawn_data {
        uint32_t ifindex; /**< Network interface index. */
        uint32_t max_port; /**< Device maximal port index. */
+       uint32_t nb_uplinks; /**< Number of uplinks associated with IB device. 
*/
+       uint32_t nb_hpfs; /**< Number of host PFs associated with IB device. */
        uint32_t phys_port; /**< Device physical port index. */
        int pf_bond; /**< bonding device PF index. < 0 - no bonding */
        int mpesw_port; /**< MPESW uplink index. Valid if mpesw_owner_port >= 
0. */
-- 
2.47.3

Reply via email to