On 03/21/2018 02:21 PM, Xiao Wang wrote:
ifcvf driver uses vdev as a control domain to manage ifc VFs that belong
to it. It registers vDPA device ops to vhost lib to enable these VFs to be
used as vhost data path accelerator.

Live migration feature is supported by ifc VF and this driver enables
it based on vhost lib.

Because vDPA driver needs to set up MSI-X vector to interrupt the guest,
only vfio-pci is supported currently.

Signed-off-by: Xiao Wang <xiao.w.w...@intel.com>
Signed-off-by: Rosen Xu <rosen...@intel.com>
---
v2:
- Rebase on Zhihong's vDPA v3 patch set.
---
  config/common_base                      |    6 +
  config/common_linuxapp                  |    1 +
  drivers/net/Makefile                    |    1 +
  drivers/net/ifcvf/Makefile              |   40 +
  drivers/net/ifcvf/base/ifcvf.c          |  329 ++++++++
  drivers/net/ifcvf/base/ifcvf.h          |  156 ++++
  drivers/net/ifcvf/base/ifcvf_osdep.h    |   52 ++
  drivers/net/ifcvf/ifcvf_ethdev.c        | 1240 +++++++++++++++++++++++++++++++
  drivers/net/ifcvf/rte_ifcvf_version.map |    4 +
  mk/rte.app.mk                           |    1 +
  10 files changed, 1830 insertions(+)
  create mode 100644 drivers/net/ifcvf/Makefile
  create mode 100644 drivers/net/ifcvf/base/ifcvf.c
  create mode 100644 drivers/net/ifcvf/base/ifcvf.h
  create mode 100644 drivers/net/ifcvf/base/ifcvf_osdep.h
  create mode 100644 drivers/net/ifcvf/ifcvf_ethdev.c
  create mode 100644 drivers/net/ifcvf/rte_ifcvf_version.map


...

+static int
+eth_dev_ifcvf_create(struct rte_vdev_device *dev,
+               struct rte_pci_addr *pci_addr, int devices)
+{
+       const char *name = rte_vdev_device_name(dev);
+       struct rte_eth_dev *eth_dev = NULL;
+       struct ether_addr *eth_addr = NULL;
+       struct ifcvf_internal *internal = NULL;
+       struct internal_list *list = NULL;
+       struct rte_eth_dev_data *data = NULL;
+       struct rte_pci_addr pf_addr = *pci_addr;
+       int i;
+
+       list = rte_zmalloc_socket(name, sizeof(*list), 0,
+                       dev->device.numa_node);
+       if (list == NULL)
+               goto error;
+
+       /* reserve an ethdev entry */
+       eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal));
+       if (eth_dev == NULL)
+               goto error;
+
+       eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0,
+                       dev->device.numa_node);
+       if (eth_addr == NULL)
+               goto error;
+
+       *eth_addr = base_eth_addr;
+       eth_addr->addr_bytes[5] = eth_dev->data->port_id;
+
+       internal = eth_dev->data->dev_private;
+       internal->dev_name = strdup(name);
+       if (internal->dev_name == NULL)
+               goto error;
+
+       internal->eng_addr.pci_addr = *pci_addr;
+       for (i = 0; i < devices; i++) {
+               pf_addr.domain = pci_addr->domain;
+               pf_addr.bus = pci_addr->bus;
+               pf_addr.devid = pci_addr->devid + (i + 1) / 8;
+               pf_addr.function = pci_addr->function + (i + 1) % 8;
+               internal->vf_info[i].pdev.addr = pf_addr;
+               rte_spinlock_init(&internal->vf_info[i].lock);
+       }
+       internal->max_devices = devices;
+
+       list->eth_dev = eth_dev;
+       pthread_mutex_lock(&internal_list_lock);
+       TAILQ_INSERT_TAIL(&internal_list, list, next);
+       pthread_mutex_unlock(&internal_list_lock);
+
+       data = eth_dev->data;
+       data->nb_rx_queues = IFCVF_MAX_QUEUES;
+       data->nb_tx_queues = IFCVF_MAX_QUEUES;
+       data->dev_link = vdpa_link;
+       data->mac_addrs = eth_addr;

We might want one ethernet device per VF, as for example you set
dev_link.link_status to UP as soon as a VF is configured, and DOWN
as when a single VF is removed.

+       data->dev_flags = RTE_ETH_DEV_INTR_LSC;
+       eth_dev->dev_ops = &ops;
+
+       /* assign rx and tx ops, could be used as vDPA fallback */
+       eth_dev->rx_pkt_burst = eth_ifcvf_rx;
+       eth_dev->tx_pkt_burst = eth_ifcvf_tx;
+
+       if (rte_vdpa_register_engine(vdpa_ifcvf_driver.name,
+                               &internal->eng_addr) < 0)
+               goto error;
+
+       return 0;
+
+error:
+       rte_free(list);
+       rte_free(eth_addr);
+       if (internal && internal->dev_name)
+               free(internal->dev_name);
+       rte_free(internal);
+       if (eth_dev)
+               rte_eth_dev_release_port(eth_dev);
+
+       return -1;
+}
+
+static int
+get_pci_addr(const char *key __rte_unused, const char *value, void *extra_args)
+{
+       if (value == NULL || extra_args == NULL)
+               return -1;
+
+       return rte_pci_addr_parse(value, extra_args);
+}
+
+static inline int
+open_int(const char *key __rte_unused, const char *value, void *extra_args)
+{
+       uint16_t *n = extra_args;
+
+       if (value == NULL || extra_args == NULL)
+               return -EINVAL;
+
+       *n = (uint16_t)strtoul(value, NULL, 0);
+       if (*n == USHRT_MAX && errno == ERANGE)
+               return -1;
+
+       return 0;
+}
+
+/*
+ * If this vdev is created by user, then ifcvf will be taken by
+ * this vdev.
+ */
+static int
+ifcvf_take_over(struct rte_pci_addr *pci_addr, int num)
+{
+       uint16_t port_id;
+       int i, ret;
+       char devname[RTE_DEV_NAME_MAX_LEN];
+       struct rte_pci_addr vf_addr = *pci_addr;
+
+       for (i = 0; i < num; i++) {
+               vf_addr.function += i % 8;
+               vf_addr.devid += i / 8;
+               rte_pci_device_name(&vf_addr, devname, RTE_DEV_NAME_MAX_LEN);
+               ret = rte_eth_dev_get_port_by_name(devname, &port_id);
+               if (ret == 0) {
+                       rte_eth_dev_close(port_id);
+                       if (rte_eth_dev_detach(port_id, devname) < 0)
+                               return -1;
+               }
That seems a bit hard.
Shouldn't we at least check the port is not started?

+       }
+
+       return 0;
+}
+
+static int
+rte_ifcvf_probe(struct rte_vdev_device *dev)
+{
+       struct rte_kvargs *kvlist = NULL;
+       int ret = 0;
+       struct rte_pci_addr pci_addr;
+       int devices;
+
+       RTE_LOG(INFO, PMD, "Initializing ifcvf for %s\n",
+                       rte_vdev_device_name(dev));
+
+       kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
+       if (kvlist == NULL)
+               return -1;
+
+       if (rte_kvargs_count(kvlist, ETH_IFCVF_BDF_ARG) == 1) {
+               ret = rte_kvargs_process(kvlist, ETH_IFCVF_BDF_ARG,
+                               &get_pci_addr, &pci_addr);
+               if (ret < 0)
+                       goto out_free;
+
+       } else {
+               ret = -1;
+               goto out_free;
+       }
+
+       if (rte_kvargs_count(kvlist, ETH_IFCVF_DEVICES_ARG) == 1) {
+               ret = rte_kvargs_process(kvlist, ETH_IFCVF_DEVICES_ARG,
+                               &open_int, &devices);
+               if (ret < 0 || devices > IFCVF_MAX_DEVICES)
+                       goto out_free;
+       } else {
+               devices = 1;
+       }
+
+       ret = ifcvf_take_over(&pci_addr, devices);
+       if (ret < 0)
+               goto out_free;
+
+       eth_dev_ifcvf_create(dev, &pci_addr, devices);
+
+out_free:
+       rte_kvargs_free(kvlist);
+       return ret;
+}
+
+static int
+rte_ifcvf_remove(struct rte_vdev_device *dev)
+{
+       const char *name;
+       struct rte_eth_dev *eth_dev = NULL;
+
+       name = rte_vdev_device_name(dev);
+       RTE_LOG(INFO, PMD, "Un-Initializing ifcvf for %s\n", name);
+
+       /* find an ethdev entry */
+       eth_dev = rte_eth_dev_allocated(name);
+       if (eth_dev == NULL)
+               return -ENODEV;
+
+       eth_dev_close(eth_dev);
+       rte_free(eth_dev->data);
+       rte_eth_dev_release_port(eth_dev);
+
+       return 0;
+}
+
+static struct rte_vdev_driver ifcvf_drv = {
+       .probe = rte_ifcvf_probe,
+       .remove = rte_ifcvf_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_ifcvf, ifcvf_drv);
+RTE_PMD_REGISTER_ALIAS(net_ifcvf, eth_ifcvf);
+RTE_PMD_REGISTER_PARAM_STRING(net_ifcvf,
+       "bdf=<bdf> "
+       "devices=<int>");
diff --git a/drivers/net/ifcvf/rte_ifcvf_version.map 
b/drivers/net/ifcvf/rte_ifcvf_version.map
new file mode 100644
index 000000000..33d237913
--- /dev/null
+++ b/drivers/net/ifcvf/rte_ifcvf_version.map
@@ -0,0 +1,4 @@
+EXPERIMENTAL {
+
+       local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 3eb41d176..be5f765e4 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -171,6 +171,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_VDEV_NETVSC_PMD) += 
-lrte_pmd_vdev_netvsc
  _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
  ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
+_LDLIBS-$(CONFIG_RTE_LIBRTE_IFCVF)          += -lrte_ifcvf
  endif # $(CONFIG_RTE_LIBRTE_VHOST)
  _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio

Reply via email to