On 7/9/2022 12:49 AM, lon...@linuxonhyperv.com wrote:
CAUTION: This message has originated from an External Source. Please use proper 
judgment and caution when opening attachments, clicking links, or responding to 
this email.


From: Long Li <lon...@microsoft.com>

MANA is a PCI device. It uses IB verbs to access hardware through the
kernel RDMA layer. This patch introduces build environment and basic
device probe functions.

Signed-off-by: Long Li <lon...@microsoft.com>
---
Change log:
v2:
Fix typos.
Make the driver build only on x86-64 and Linux.
Remove unused header files.
Change port definition to uint16_t or uint8_t (for IB).
Use getline() in place of fgets() to read and truncate a line.
v3:
Add meson build check for required functions from RDMA direct verb header file
v4:
Remove extra "\n" in logging code.
Use "r" in place of "rb" in fopen() to read text files.


<...>

--- /dev/null
+++ b/doc/guides/nics/mana.rst
@@ -0,0 +1,66 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright 2022 Microsoft Corporation
+
+MANA poll mode driver library
+=============================
+
+The MANA poll mode driver library (**librte_net_mana**) implements support
+for Microsoft Azure Network Adapter VF in SR-IOV context.
+

Can you please provide any link to an official product description? As a reference point for anybody interested more with the product details.


<..>

+
+Netvsc PMD arguments > +--------------------

'Netvsc'? Do you mean 'MANA'?
j
+
+The user can specify below argument in devargs.
+
+#.  ``mac``:
+
+    Specify the MAC address for this device. If it is set, the driver
+    probes and loads the NIC with a matching mac address. If it is not
+    set, the driver probes on all the NICs on the PCI device. The default
+    value is not set, meaning all the NICs will be probed and loaded.


Code accepts up to 8 mac value, should this be documented?

Also why this devarg is needed?

diff --git a/drivers/net/mana/mana.c b/drivers/net/mana/mana.c
new file mode 100644
index 0000000000..cb59eb6882
--- /dev/null
+++ b/drivers/net/mana/mana.c
@@ -0,0 +1,704 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Microsoft Corporation
+ */
+
+#include <unistd.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <ethdev_driver.h>
+#include <ethdev_pci.h>
+#include <rte_kvargs.h>
+#include <rte_eal_paging.h>
+
+#include <infiniband/verbs.h>
+#include <infiniband/manadv.h>
+
+#include <assert.h>
+
+#include "mana.h"
+
+/* Shared memory between primary/secondary processes, per driver */
+struct mana_shared_data *mana_shared_data;
+const struct rte_memzone *mana_shared_mz;

If these global variables are not used by other compilation units, please try to make them static as much as possible.

+static const char *MZ_MANA_SHARED_DATA = "mana_shared_data";
+
+struct mana_shared_data mana_local_data;
+

Can you put some comment to this global variables?

+/* Spinlock for mana_shared_data */
+static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* Allocate a buffer on the stack and fill it with a printf format string. */
+#define MKSTR(name, ...) \
+       int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \
+       char name[mkstr_size_##name + 1]; \
+       \
+       memset(name, 0, mkstr_size_##name + 1); \
+       snprintf(name, sizeof(name), "" __VA_ARGS__)
+
+int mana_logtype_driver;
+int mana_logtype_init;
+
+const struct eth_dev_ops mana_dev_ops = {
+};
+
+const struct eth_dev_ops mana_dev_sec_ops = {
+};

It may be better to expand 'sec' to secondary to not confuse with security etc...

+
+uint16_t
+mana_rx_burst_removed(void *dpdk_rxq __rte_unused,
+                     struct rte_mbuf **pkts __rte_unused,
+                     uint16_t pkts_n __rte_unused)
+{
+       rte_mb();
+       return 0;
+}
+
+uint16_t
+mana_tx_burst_removed(void *dpdk_rxq __rte_unused,
+                     struct rte_mbuf **pkts __rte_unused,
+                     uint16_t pkts_n __rte_unused)
+{
+       rte_mb();
+       return 0;
+}
+
+static const char *mana_init_args[] = {
+       "mac",
+       NULL,
+};
+
+/* Support of parsing up to 8 mac address from EAL command line */
+#define MAX_NUM_ADDRESS 8
+struct mana_conf {
+       struct rte_ether_addr mac_array[MAX_NUM_ADDRESS];
+       unsigned int index;
+};
+
+static int mana_arg_parse_callback(const char *key, const char *val,
+                                  void *private)

Since this is new driver, better to follow the coding convention:
https://doc.dpdk.org/guides/contributing/coding_style.html

Please put return type to another line:

static int
mana_arg_parse_callback(const char *key, const char *val, void *private)

+{
+       struct mana_conf *conf = (struct mana_conf *)private;
+       int ret;
+
+       DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index);
+
+       if (conf->index >= MAX_NUM_ADDRESS) {
+               DRV_LOG(ERR, "Exceeding max MAC address");
+               return 1;
+       }
+
+       ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]);
+       if (ret) {
+               DRV_LOG(ERR, "Invalid MAC address %s", val);
+               return ret;
+       }
+
+       conf->index++;
+
+       return 0;
+}
+

<...>

+static int get_port_mac(struct ibv_device *device, unsigned int port,
+                       struct rte_ether_addr *addr)
+{
+       FILE *file;
+       int ret = 0;
+       DIR *dir;
+       struct dirent *dent;
+       unsigned int dev_port;
+       char mac[20];
+
+       MKSTR(path, "%s/device/net", device->ibdev_path);
+
+       dir = opendir(path);
+       if (!dir)
+               return -ENOENT;
+
+       while ((dent = readdir(dir))) {
+               char *name = dent->d_name;
+
+               MKSTR(filepath, "%s/%s/dev_port", path, name);
+
+               /* Ignore . and .. */
+               if ((name[0] == '.') &&
+                   ((name[1] == '\0') ||
+                    ((name[1] == '.') && (name[2] == '\0'))))
+                       continue;
+
+               file = fopen(filepath, "r");
+               if (!file)
+                       continue;
+
+               ret = fscanf(file, "%u", &dev_port);
+               fclose(file);
+
+               if (ret != 1)
+                       continue;
+
+               /* Ethernet ports start at 0, IB port start at 1 */
+               if (dev_port == port - 1) {
+                       MKSTR(filepath, "%s/%s/address", path, name);


'MKSTR' macro adds two variables related with first argument, 'filepath' already used above. Yes there is a new scope but better to not define new variables, can you select a new name here?

<...>

+
+static int mana_pci_probe_mac(struct rte_pci_driver *pci_drv __rte_unused,

This is a static function, if you don't use 'pci_drv', why not drop it from the argument list.

+                             struct rte_pci_device *pci_dev,
+                             struct rte_ether_addr *mac_addr)
+{
+       struct ibv_device **ibv_list;
+       int ibv_idx;
+       struct ibv_context *ctx;
+       struct ibv_device_attr_ex dev_attr;
+       int num_devices;
+       int ret = 0;
+       uint8_t port;
+       struct mana_priv *priv = NULL;
+       struct rte_eth_dev *eth_dev = NULL;
+       bool found_port;
+
+       ibv_list = ibv_get_device_list(&num_devices);
+       for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) {
+               struct ibv_device *ibdev = ibv_list[ibv_idx];
+               struct rte_pci_addr pci_addr;
+
+               DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s",
+                       ibdev->name, ibdev->dev_name, ibdev->ibdev_path);
+
+               if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr))
+                       continue;
+
+               /* Ignore if this IB device is not this PCI device */
+               if (pci_dev->addr.domain != pci_addr.domain ||
+                   pci_dev->addr.bus != pci_addr.bus ||
+                   pci_dev->addr.devid != pci_addr.devid ||
+                   pci_dev->addr.function != pci_addr.function)
+                       continue;
+

As far as I understand, intention of this loop is to find 'ibdev' matching this device, code gooes through all "ibv device list" for this, I wonder if there is a easy way for doing this, like a sysfs entry to help getting this information?
And how mlx4/5 does this?

+               ctx = ibv_open_device(ibdev);
+               if (!ctx) {
+                       DRV_LOG(ERR, "Failed to open IB device %s",
+                               ibdev->name);
+                       continue;
+               }
+
+               ret = ibv_query_device_ex(ctx, NULL, &dev_attr);
+               DRV_LOG(INFO, "dev_attr.orig_attr.phys_port_cnt %u",
+                       dev_attr.orig_attr.phys_port_cnt);
+               found_port = false;
+
+               for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt;
+                    port++) {
+                       struct ibv_parent_domain_init_attr attr = {};

"= { 0 };" for portability.

<...>

+static int mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+                         struct rte_pci_device *pci_dev)
+{
+       struct rte_devargs *args = pci_dev->device.devargs;
+       struct mana_conf conf = {};

afaik, this is not part of c spec yet, why not initialize as " = {0}".

+       unsigned int i;
+       int ret;
+
+       if (args && args->args) {

You can prefer 'args->drv_str', which is newer name of the args.

<...>

+static const struct rte_pci_id mana_pci_id_map[] = {
+       {
+               RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT,
+                              PCI_DEVICE_ID_MICROSOFT_MANA)
+       },

PCI ID list should be terminated with ".vendor_id = 0", otherwise PCI bus scan loop may behave unexpectedly.

+};
+
+static struct rte_pci_driver mana_pci_driver = {
+       .driver = {
+               .name = "mana_pci",

driver names are mostly like 'net_<driver_name>', is there a reason to diverge from it? Also if you use 'RTE_PMD_REGISTER_PCI' macro, it will be standardised anyway.

+       },
+       .id_table = mana_pci_id_map,
+       .probe = mana_pci_probe,
+       .remove = mana_pci_remove,
+       .drv_flags = RTE_PCI_DRV_INTR_RMV,
+};
+
+RTE_INIT(rte_mana_pmd_init)
+{
+       rte_pci_register(&mana_pci_driver);
+}
+

Why not using 'RTE_PMD_REGISTER_PCI()' macro instead?

+RTE_PMD_EXPORT_NAME(net_mana, __COUNTER__);
+RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib");
+RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE);
+RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE);
diff --git a/drivers/net/mana/mana.h b/drivers/net/mana/mana.h
new file mode 100644
index 0000000000..e30c030b4e
--- /dev/null
+++ b/drivers/net/mana/mana.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Microsoft Corporation
+ */
+
+#ifndef __MANA_H__
+#define __MANA_H__
+
+enum {
+       PCI_VENDOR_ID_MICROSOFT = 0x1414,
+};
+
+enum {
+       PCI_DEVICE_ID_MICROSOFT_MANA = 0x00ba,
+};
+
+/* Shared data between primary/secondary processes */
+struct mana_shared_data {
+       rte_spinlock_t lock;
+       int init_done;
+       unsigned int primary_cnt;
+       unsigned int secondary_cnt;
+};
+
+#define MIN_RX_BUF_SIZE        1024
+#define MAX_FRAME_SIZE RTE_ETHER_MAX_LEN
+#define BNIC_MAX_MAC_ADDR 1
+

What 'BNIC_' prefix stands for? If it is related to the PMD, what do you think to use 'MANA_' as prefix?
Same for multiple macros below.

<...>

+
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+
+const uint32_t *mana_supported_ptypes(struct rte_eth_dev *dev);
+

This function is not defined in this patch, so can drop declarataion.

<...>

diff --git a/drivers/net/mana/version.map b/drivers/net/mana/version.map
new file mode 100644
index 0000000000..c2e0723b4c
--- /dev/null
+++ b/drivers/net/mana/version.map
@@ -0,0 +1,3 @@
+DPDK_22 {

It is 'DPDK_23' now.

Reply via email to