On 9/24/2022 3:45 AM, lon...@linuxonhyperv.com wrote:


From: Long Li <lon...@microsoft.com>

MANA is a PCI device. It uses IB verbs to access hardware through the
kernel RDMA layer. This patch introduces build environment and basic
device probe functions.

Signed-off-by: Long Li <lon...@microsoft.com>

<...>

+++ b/doc/guides/nics/mana.rst
@@ -0,0 +1,69 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright 2022 Microsoft Corporation
+
+MANA poll mode driver library
+=============================
+
+The MANA poll mode driver library (**librte_net_mana**) implements support
+for Microsoft Azure Network Adapter VF in SR-IOV context.
+
+Features
+--------
+
+Features of the MANA Ethdev PMD are:
+
+Prerequisites
+-------------
+
+This driver relies on external libraries and kernel drivers for resources
+allocations and initialization. The following dependencies are not part of
+DPDK and must be installed separately:
+
+- **libibverbs** (provided by rdma-core package)
+

Does it make sense to provide rdma-core git repo link?

<...>

+
+static const char * const mana_init_args[] = {
+       "mac",

It is better to define a macro for the devarg string to be able to reuse it in 'RTE_PMD_REGISTER_PARAM_STRING' (please see below).

#define ETH_MANA_MAC_ARG "mac"
static const char * const mana_init_args[] = {
        ETH_MANA_MAC_ARG,
        NULL,
};

<...>

+
+/*
+ * Goes through the IB device list to look for the IB port matching the
+ * mac_addr. If found, create a rte_eth_dev for it.
+ */
+static int
+mana_pci_probe_mac(struct rte_pci_device *pci_dev,
+                  struct rte_ether_addr *mac_addr)
+{
+       struct ibv_device **ibv_list;
+       int ibv_idx;
+       struct ibv_context *ctx;
+       struct ibv_device_attr_ex dev_attr;
+       int num_devices;
+       int ret = 0;
+       uint8_t port;
+       struct mana_priv *priv = NULL;
+       struct rte_eth_dev *eth_dev = NULL;
+       bool found_port;
+
+       ibv_list = ibv_get_device_list(&num_devices);
+       for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) {
+               struct ibv_device *ibdev = ibv_list[ibv_idx];
+               struct rte_pci_addr pci_addr;
+
+               DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s",
+                       ibdev->name, ibdev->dev_name, ibdev->ibdev_path);
+
+               if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr))
+                       continue;
+
+               /* Ignore if this IB device is not this PCI device */
+               if (pci_dev->addr.domain != pci_addr.domain ||
+                   pci_dev->addr.bus != pci_addr.bus ||
+                   pci_dev->addr.devid != pci_addr.devid ||
+                   pci_dev->addr.function != pci_addr.function)
+                       continue;
+
+               ctx = ibv_open_device(ibdev);
+               if (!ctx) {
+                       DRV_LOG(ERR, "Failed to open IB device %s",
+                               ibdev->name);
+                       continue;
+               }
+
+               ret = ibv_query_device_ex(ctx, NULL, &dev_attr);
+               DRV_LOG(INFO, "dev_attr.orig_attr.phys_port_cnt %u",
+                       dev_attr.orig_attr.phys_port_cnt);
+               found_port = false;
+
+               for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt;
+                    port++) {
+                       struct ibv_parent_domain_init_attr attr = {0};
+                       struct rte_ether_addr addr;
+                       char address[64];
+                       char name[RTE_ETH_NAME_MAX_LEN];
+
+                       ret = get_port_mac(ibdev, port, &addr);
+                       if (ret)
+                               continue;
+
+                       if (mac_addr && !rte_is_same_ether_addr(&addr, 
mac_addr))
+                               continue;
+
+                       rte_ether_format_addr(address, sizeof(address), &addr);
+                       DRV_LOG(INFO, "device located port %u address %s",
+                               port, address);
+                       found_port = true;
+
+                       priv = rte_zmalloc_socket(NULL, sizeof(*priv),
+                                                 RTE_CACHE_LINE_SIZE,
+                                                 SOCKET_ID_ANY);
+                       if (!priv) {
+                               ret = -ENOMEM;
+                               goto failed;
+                       }
+
+                       snprintf(name, sizeof(name), "%s_port%d",
+                                pci_dev->device.name, port);
+
+                       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+                               int fd;
+
+                               eth_dev = rte_eth_dev_attach_secondary(name);
+                               if (!eth_dev) {
+                                       DRV_LOG(ERR, "Can't attach to dev %s",
+                                               name);
+                                       ret = -ENOMEM;
+                                       goto failed;
+                               }
+
+                               eth_dev->device = &pci_dev->device;
+                               eth_dev->dev_ops = &mana_dev_secondary_ops;
+                               ret = mana_proc_priv_init(eth_dev);
+                               if (ret)
+                                       goto failed;
+                               priv->process_priv = eth_dev->process_private;
+
+                               /* Get the IB FD from the primary process */
+                               fd = mana_mp_req_verbs_cmd_fd(eth_dev);
+                               if (fd < 0) {
+                                       DRV_LOG(ERR, "Failed to get FD %d", fd);
+                                       ret = -ENODEV;
+                                       goto failed;
+                               }
+
+                               ret = mana_map_doorbell_secondary(eth_dev, fd);
+                               if (ret) {
+                                       DRV_LOG(ERR, "Failed secondary map %d",
+                                               fd);

The indentation level (and lenght) of this functions hints that some part of it can be seprated as function, like probe one 'ibv_device' can be on its own function.

Can you refactor the function, to increase readability? It is control path, so there is no restriction to have function calls.

+                                       goto failed;
+                               }
+
+                               /* fd is no not used after mapping doorbell */
+                               close(fd);
+
+                               rte_spinlock_lock(&mana_shared_data->lock);
+                               mana_shared_data->secondary_cnt++;
+                               mana_local_data.secondary_cnt++;
+                               rte_spinlock_unlock(&mana_shared_data->lock);
+
+                               rte_eth_copy_pci_info(eth_dev, pci_dev);
+                               rte_eth_dev_probing_finish(eth_dev);
+
+                               /* Impossible to have more than one port
+                                * matching a MAC address
+                                */
+                               continue;
+                       }
+
+                       eth_dev = rte_eth_dev_allocate(name);
+                       if (!eth_dev) {
+                               ret = -ENOMEM;
+                               goto failed;
+                       }
+
+                       eth_dev->data->mac_addrs =
+                               rte_calloc("mana_mac", 1,
+                                          sizeof(struct rte_ether_addr), 0);
+                       if (!eth_dev->data->mac_addrs) {
+                               ret = -ENOMEM;
+                               goto failed;
+                       }
+
+                       rte_ether_addr_copy(&addr, eth_dev->data->mac_addrs);
+
+                       priv->ib_pd = ibv_alloc_pd(ctx);
+                       if (!priv->ib_pd) {
+                               DRV_LOG(ERR, "ibv_alloc_pd failed port %d", 
port);
+                               ret = -ENOMEM;
+                               goto failed;
+                       }
+
+                       /* Create a parent domain with the port number */
+                       attr.pd = priv->ib_pd;
+                       attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT;
+                       attr.pd_context = (void *)(uint64_t)port;
+                       priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, 
&attr);
+                       if (!priv->ib_parent_pd) {
+                               DRV_LOG(ERR,
+                                       "ibv_alloc_parent_domain failed port 
%d",
+                                       port);
+                               ret = -ENOMEM;
+                               goto failed;
+                       }
+
+                       priv->ib_ctx = ctx;
+                       priv->port_id = eth_dev->data->port_id;
+                       priv->dev_port = port;
+                       eth_dev->data->dev_private = priv;
+                       priv->dev_data = eth_dev->data;
+
+                       priv->max_rx_queues = dev_attr.orig_attr.max_qp;
+                       priv->max_tx_queues = dev_attr.orig_attr.max_qp;
+
+                       priv->max_rx_desc =
+                               RTE_MIN(dev_attr.orig_attr.max_qp_wr,
+                                       dev_attr.orig_attr.max_cqe);
+                       priv->max_tx_desc =
+                               RTE_MIN(dev_attr.orig_attr.max_qp_wr,
+                                       dev_attr.orig_attr.max_cqe);
+
+                       priv->max_send_sge = dev_attr.orig_attr.max_sge;
+                       priv->max_recv_sge = dev_attr.orig_attr.max_sge;
+
+                       priv->max_mr = dev_attr.orig_attr.max_mr;
+                       priv->max_mr_size = dev_attr.orig_attr.max_mr_size;
+
+                       DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d",
+                               name, priv->max_rx_queues, priv->max_rx_desc,
+                               priv->max_send_sge);
+
+                       rte_spinlock_lock(&mana_shared_data->lock);
+                       mana_shared_data->primary_cnt++;
+                       rte_spinlock_unlock(&mana_shared_data->lock);
+
+                       eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV;
+

This assignment already done by 'rte_eth_copy_pci_info()' when 'RTE_PCI_DRV_INTR_RMV' driver flag set which this PMD sets, so assignment is redundant.

<...>

+
+RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver);
+RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib");
+RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE);
+RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE);

Can you please add 'RTE_PMD_REGISTER_PARAM_STRING' macro for 'mac' devarg?

diff --git a/drivers/net/mana/mana.h b/drivers/net/mana/mana.h
new file mode 100644
index 0000000000..a2021ceb4a
--- /dev/null
+++ b/drivers/net/mana/mana.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Microsoft Corporation
+ */
+
+#ifndef __MANA_H__
+#define __MANA_H__
+
+enum {
+       PCI_VENDOR_ID_MICROSOFT = 0x1414,
+};
+
+enum {
+       PCI_DEVICE_ID_MICROSOFT_MANA = 0x00ba,
+};

There is a common guidance to prefer enums against define BUT,

I tend to use enums for related cases, or when underneath numerical value doesn't matter.

For PCI IDs I would use #define, although both works same, what do you think to update them to define?

Reply via email to