* Zhao, Yu <[EMAIL PROTECTED]>:
> Support SR-IOV capability. By default, this feature is not enabled and the
> SR-IOV device behaves as traditional PCI device. After it's enabled, each
> Virtual Function's PCI configuration space can be accessed using its own Bus,
> Device and Function Number (Routing ID). Each Virtual Function also has PCI
> Memory Space, which is used to map its own register set.
>
> Signed-off-by: Yu Zhao <[EMAIL PROTECTED]>
> Signed-off-by: Eddie Dong <[EMAIL PROTECTED]>
>
> ---
> drivers/pci/Kconfig | 10 +
> drivers/pci/Makefile | 2 +
> drivers/pci/iov.c | 555
> ++++++++++++++++++++++++++++++++++++++++++++++
> drivers/pci/pci.c | 14 +-
> drivers/pci/pci.h | 44 ++++
> drivers/pci/probe.c | 5 +
> include/linux/pci.h | 28 +++
> include/linux/pci_regs.h | 20 ++
> 8 files changed, 677 insertions(+), 1 deletions(-)
> create mode 100644 drivers/pci/iov.c
>
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index f43cc46..0a1fe01 100644
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -57,3 +57,13 @@ config PCI_ARI
> default n
> help
> This enables PCI Alternative Routing-ID Interpretation.
> +
> +config PCI_IOV
> + bool "PCI SR-IOV support"
> + depends on PCI && HOTPLUG
> + select PCI_MSI
> + select PCI_ARI
> + select HOTPLUG_PCI
> + default n
> + help
> + This allows device drivers to enable Single Root I/O Virtualization.
> diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
> index 96f2767..2dcefce 100644
> --- a/drivers/pci/Makefile
> +++ b/drivers/pci/Makefile
> @@ -55,3 +55,5 @@ EXTRA_CFLAGS += -DDEBUG
> endif
>
> obj-$(CONFIG_PCI_ARI) += ari.o
> +
> +obj-$(CONFIG_PCI_IOV) += iov.o
> diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
> new file mode 100644
> index 0000000..0656655
> --- /dev/null
> +++ b/drivers/pci/iov.c
> @@ -0,0 +1,555 @@
> +/*
> + * drivers/pci/iov.c
> + *
> + * Copyright (C) 2008 Intel Corporation, Yu Zhao <[EMAIL PROTECTED]>
> + *
> + * PCI Express Single Root I/O Virtualization capability support.
> + */
> +
> +#include <linux/ctype.h>
> +#include <linux/string.h>
> +#include <linux/pci.h>
> +#include <linux/pci_hotplug.h>
> +#include <linux/delay.h>
> +#include <asm/page.h>
> +
> +#include "pci.h"
> +
> +
> +#define PCI_IOV_SLOTNAME_LEN 24
> +
> +#define notify(dev, event, id, param) ({ \
> + dev->iov->cb ? dev->iov->cb(dev, event, id, param) : 0; \
> +})
> +
> +
> +struct virtfn_slot {
> + int id;
> + char name[PCI_IOV_SLOTNAME_LEN];
> + struct pci_dev *dev;
> + struct list_head node;
> + struct hotplug_slot *slot;
> +};
> +
> +static int enable_virtfn(struct hotplug_slot *);
> +static int disable_virtfn(struct hotplug_slot *);
> +static int set_virtfn_param(struct hotplug_slot *, const char *, int);
> +static int get_virtfn_param(struct hotplug_slot *, const char **);
> +
> +static struct hotplug_slot_ops virtfn_slot_ops = {
> + .owner = THIS_MODULE,
> + .enable_slot = enable_virtfn,
> + .disable_slot = disable_virtfn,
> + .set_param = set_virtfn_param,
> + .get_param = get_virtfn_param
> +};
> +
> +static DEFINE_MUTEX(iov_lock);
> +
> +
> +static inline void get_addr(struct pci_dev *dev, int id, u8 *busnr, u8
> *devfn)
> +{
> + u16 addr;
> +
> + addr = (dev->bus->number << 8) + dev->devfn +
> + dev->iov->offset + dev->iov->stride * id;
> + *busnr = addr >> 8;
> + *devfn = addr & 0xff;
> +}
> +
> +static inline struct pci_bus *find_bus(struct pci_dev *dev, int busnr)
> +{
> + struct pci_bus *bus;
> +
> + down_read(&pci_bus_sem);
> + list_for_each_entry(bus, &dev->bus->children, node)
> + if (bus->number == busnr) {
> + up_read(&pci_bus_sem);
> + return bus;
> + }
> + up_read(&pci_bus_sem);
> +
> + return NULL;
> +}
> +
> +static int alloc_virtfn(struct pci_dev *dev, int id)
> +{
> + int i;
> + int rc;
> + u8 busnr, devfn;
> + unsigned long size;
> + struct pci_dev *new;
> + struct pci_bus *bus;
> + struct resource *res;
> +
> + get_addr(dev, id, &busnr, &devfn);
> +
> + new = alloc_pci_dev();
> + if (!new)
> + return -ENOMEM;
> +
> + bus = find_bus(dev, busnr);
> + BUG_ON(!bus);
> + new->bus = bus;
> + new->sysdata = bus->sysdata;
> + new->dev.parent = dev->dev.parent;
> + new->dev.bus = dev->dev.bus;
> + new->devfn = devfn;
> + new->hdr_type = PCI_HEADER_TYPE_NORMAL;
> + new->multifunction = 0;
> + new->vendor = dev->vendor;
> + pci_read_config_word(dev, dev->iov->cap + PCI_IOV_VF_DID, &new->device);
> + new->cfg_size = 4096;
> + new->error_state = pci_channel_io_normal;
> + new->pcie_type = PCI_EXP_TYPE_ENDPOINT;
> + new->dma_mask = 0xffffffff;
> +
> + dev_set_name(&new->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus),
> + busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
> +
> + pci_read_config_byte(new, PCI_REVISION_ID, &new->revision);
> + new->class = dev->class;
> + new->current_state = PCI_UNKNOWN;
> + new->irq = 0;
> +
> + for (i = 0; i < PCI_IOV_NUM_BAR; i++) {
> + res = dev->resource + PCI_IOV_RESOURCES + i;
> + if (!res->parent)
> + continue;
> + new->resource[i].name = pci_name(new);
> + new->resource[i].flags = res->flags;
> + size = resource_size(res) / dev->iov->total;
> + new->resource[i].start = res->start + size * id;
> + new->resource[i].end = new->resource[i].start + size - 1;
> + rc = request_resource(res, &new->resource[i]);
> + BUG_ON(rc);
> + }
> +
> + new->subsystem_vendor = dev->subsystem_vendor;
> + pci_read_config_word(new, PCI_SUBSYSTEM_ID, &new->subsystem_device);
> +
> + pci_device_add(new, bus);
> + return pci_bus_add_device(new);
> +}
> +
> +static int enable_virtfn(struct hotplug_slot *slot)
> +{
> + int rc;
> + u8 busnr, devfn;
> + struct pci_dev *dev;
> + struct virtfn_slot *vslot = slot->private;
> +
> + get_addr(vslot->dev, vslot->id, &busnr, &devfn);
> +
> + mutex_lock(&iov_lock);
> + dev = pci_get_bus_and_slot(busnr, devfn);
> + if (dev) {
> + pci_dev_put(dev);
> + rc = -EINVAL;
> + goto out;
> + }
> +
> + rc = notify(vslot->dev, PCI_IOV_VF_ENABLE,
> + vslot->id, vslot->slot->info->param);
> + if (rc)
> + goto out;
> +
> + rc = alloc_virtfn(vslot->dev, vslot->id);
> + if (!rc)
> + slot->info->power_status = 1;
> +out:
> + mutex_unlock(&iov_lock);
> +
> + return rc;
> +}
> +
> +static int disable_virtfn(struct hotplug_slot *slot)
> +{
> + int rc;
> + u8 busnr, devfn;
> + struct pci_dev *dev;
> + struct virtfn_slot *vslot = slot->private;
> +
> + get_addr(vslot->dev, vslot->id, &busnr, &devfn);
> +
> + mutex_lock(&iov_lock);
> + dev = pci_get_bus_and_slot(busnr, devfn);
> + if (!dev) {
> + rc = -ENODEV;
> + goto out;
> + }
> +
> + pci_dev_put(dev);
> + pci_remove_bus_device(dev);
> + rc = notify(vslot->dev, PCI_IOV_VF_DISABLE, vslot->id, NULL);
> + slot->info->power_status = 0;
> +out:
> + mutex_unlock(&iov_lock);
> +
> + return rc;
> +}
> +
> +static int set_virtfn_param(struct hotplug_slot *slot, const char *buf, int
> len)
> +{
> + int rc;
> + struct virtfn_slot *vslot = slot->private;
> +
> + if (len > PCI_IOV_PARAM_LEN)
> + return -E2BIG;
> +
> + strcpy(slot->info->param, buf);
> + rc = notify(vslot->dev, PCI_IOV_VF_SETPARAM,
> + vslot->id, vslot->slot->info->param);
> + if (rc)
> + memset(slot->info->param, 0, PCI_IOV_PARAM_LEN);
> +
> + return rc;
> +}
> +
> +static int get_virtfn_param(struct hotplug_slot *slot, const char **param)
> +{
> + int rc;
> + struct virtfn_slot *vslot = slot->private;
> +
> + rc = notify(vslot->dev, PCI_IOV_VF_GETPARAM,
> + vslot->id, vslot->slot->info->param);
> + if (!rc)
> + *param = slot->info->param;
> +
> + return rc;
> +}
> +
> +static void remove_slot(struct hotplug_slot *slot)
> +{
> + struct virtfn_slot *vslot = slot->private;
> +
> + disable_virtfn(slot);
> + pci_dev_put(vslot->dev);
> + list_del(&vslot->node);
> + kfree(slot->info->param);
> + kfree(slot->info);
> + kfree(slot);
> + kfree(vslot);
> +}
> +
> +static int add_slot(struct pci_dev *dev, int id)
> +{
> + int rc = -ENOMEM;
> + u8 busnr, devfn;
> + struct pci_bus *bus;
> + struct hotplug_slot *slot;
> + struct virtfn_slot *vslot;
> +
> + slot = kzalloc(sizeof(*slot), GFP_KERNEL);
> + if (!slot)
> + return rc;
> +
> + slot->info = kzalloc(sizeof(*slot->info), GFP_KERNEL);
> + if (!slot->info)
> + goto failed1;
> +
> + slot->info->param = kzalloc(PCI_IOV_PARAM_LEN, GFP_KERNEL);
> + if (!slot->info)
> + goto failed2;
> +
> + vslot = kzalloc(sizeof(*vslot), GFP_KERNEL);
> + if (!vslot)
> + goto failed3;
> +
> + slot->name = vslot->name;
> + sprintf(slot->name, "%s-iov-%04x", pci_name(dev), id);
> + slot->ops = &virtfn_slot_ops;
> + slot->release = &remove_slot;
> + slot->private = vslot;
> + vslot->id = id;
> + vslot->dev = pci_dev_get(dev);
> + vslot->slot = slot;
> +
> + get_addr(dev, id, &busnr, &devfn);
> + bus = find_bus(dev, busnr);
> + BUG_ON(!bus);
> +
> + /* use device and function # as slot # */
> + rc = pci_hp_register(slot, bus, devfn);
> + if (rc)
> + goto failed4;
So, what happens if another hotplug driver is already loaded?
I don't know the SR-IOV spec well enough to know if you are
allowed to have SR-IOV + some other form of hotplug, like ACPI or
native PCIe.
Today, pci_hp_register will return -EBUSY.
If SR-IOV really doesn't have anything to do with hotplug, then
it may be a candidate for directly calling pci_create_slot(). In
that case, 'param' should not be a property of a hotplug slot,
but of a generic PCI slot.
Thanks.
/ac
_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/virtualization