Hi Kim,

1) I agree with Alex on the fact I would prefer to have the qemu-side
platform device code separated from PCI device one, both using a separate
generic helper code. Indeed calling functions referencing BAR in the
platform device does not look natural to me;  although I understand the
code reuse rationale. This is also how the kernel side code is laid out.

2) about nr_regions. What is the number of nr_regions do you expect for
platform_devices? bars[] possible overshoot as already mentionned if
nr_regions > 6.

3) I understand you build nr_regions MemoryRegion (each mmaped on the hpa).
However I understand you only attach a single one to the SysBusDevice in
vfio_platform_realize:
sysbus_init_mmio(sbdev, &vdev->bars[0].mmap_mem);  this MemRegion is
assigned a gpa in virt.c? is it the correct understanding? What is the mem
hierarchy that is targetted. In PCI there are subregions. In this device,
there would be a single layer or hierarchy?

4)  vfio_unmap_bars should destroy the platform device mmaped regions
(vdev->mmap_mem and munmap the correspond hva (currently on slow regions
are munmapped and destroyed)

Best Regards

Eric


On 26 February 2014 03:37, Kim Phillips <kim.phill...@linaro.org> wrote:

> We basically add support for the SysBusDevice type in addition to the
> existing PCIDevice support.  This involves taking common code from the
> existing vfio_initfn(), and putting it under a new vfio_find_get_group(),
> that both vfio_initfn() and the new vfio_platform_realize() call.
> Since realize() returns void, unlike PCIDevice's initfn(),
> error codes are moved into the error message text with %m.
> Some exceptions to the PCI path are added for platform devices,
> mostly in the form of early returns, since less setup is needed.
> I chose to reuse VFIODevice's config_size to determine whether
> the device was a PCI device or a platform device, which might
> need to change.
>
> Currently only MMIO access is supported at this time.  It works
> because of qemu's stage 1 translation, but needs to be in stage 2
> in case other entities than the guest OS want to access it.
> A KVM patch to address this is in the works.
>
> The perceived path for future QEMU development is:
>
> - support allocating a variable number of regions
>   - VFIODevice's bars[] become dynamically allocated *regions
>   - VFIOBAR's device fd replaced with parent VFIODevice ptr,
>     to facilitate BAR r/w ops calling vfio_eoi()
> - add support for interrupts
> - verify and test platform dev unmap path
> - test existing PCI path for any regressions
> - add support for creating platform devices on the qemu command line
>   - currently device address specification is hardcoded for test
>     development on Calxeda Midway's fff51000.ethernet device
> - reset is not supported and registration of reset functions is
>   bypassed for platform devices.
>   - there is no standard means of resetting a platform device,
>     unsure if it suffices to be handled at device--VFIO binding time
>
> Signed-off-by: Kim Phillips <kim.phill...@linaro.org>
> ---
>  hw/misc/vfio.c | 180
> ++++++++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 145 insertions(+), 35 deletions(-)
>
> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> index 8db182f..eed24db 100644
> --- a/hw/misc/vfio.c
> +++ b/hw/misc/vfio.c
> @@ -32,6 +32,7 @@
>  #include "hw/pci/msi.h"
>  #include "hw/pci/msix.h"
>  #include "hw/pci/pci.h"
> +#include "hw/sysbus.h"
>  #include "qemu-common.h"
>  #include "qemu/error-report.h"
>  #include "qemu/event_notifier.h"
> @@ -166,7 +167,10 @@ typedef struct VFIOMSIXInfo {
>  } VFIOMSIXInfo;
>
>  typedef struct VFIODevice {
> -    PCIDevice pdev;
> +    union {
> +        PCIDevice pdev;
> +        SysBusDevice sbdev;
> +    };
>      int fd;
>      VFIOINTx intx;
>      unsigned int config_size;
> @@ -180,6 +184,8 @@ typedef struct VFIODevice {
>      VFIOMSIXInfo *msix;
>      int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
>      int interrupt; /* Current interrupt type */
> +    char *name;  /* platform device name, e.g., fff51000.ethernet */
> +    int nr_regions; /* platform devices' number of regions */
>      VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
>      VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */
>      PCIHostDeviceAddress host;
> @@ -2497,8 +2503,6 @@ empty_region:
>          memory_region_init(submem, OBJECT(vdev), name, 0);
>      }
>
> -    memory_region_add_subregion(mem, offset, submem);
> -
>      return ret;
>  }
>
> @@ -2552,6 +2556,7 @@ static void vfio_map_bar(VFIODevice *vdev, int nr)
>                        &bar->mmap_mem, &bar->mmap, size, 0, name)) {
>          error_report("%s unsupported. Performance may be slow", name);
>      }
> +    memory_region_add_subregion(&bar->mem, 0, &bar->mmap_mem);
>
>      if (vdev->msix && vdev->msix->table_bar == nr) {
>          unsigned start;
> @@ -2566,15 +2571,38 @@ static void vfio_map_bar(VFIODevice *vdev, int nr)
>                            &vdev->msix->mmap, size, start, name)) {
>              error_report("%s unsupported. Performance may be slow", name);
>          }
> +        memory_region_add_subregion(&bar->mem, start,
> &vdev->msix->mmap_mem);
>      }
>
>      vfio_bar_quirk_setup(vdev, nr);
>  }
>
> +static void vfio_map_region(VFIODevice *vdev, int nr)
> +{
> +    VFIOBAR *bar = &vdev->bars[nr];
> +    unsigned size = bar->size;
> +    char name[64];
> +
> +    snprintf(name, sizeof(name), "VFIO %s region %d mmap", vdev->name,
> nr);
> +
> +    if (vfio_mmap_bar(vdev, bar, &bar->mem,
> +                      &bar->mmap_mem, &bar->mmap, size, 0, name)) {
> +        error_report("error mmapping %s: %m", name);
> +    }
> +}
> +
>  static void vfio_map_bars(VFIODevice *vdev)
>  {
>      int i;
>
> +    if (!vdev->config_size) {
> +        /* platform device */
> +        for (i = 0; i < vdev->nr_regions; i++) {
> +            vfio_map_region(vdev, i);
> +        }
> +        return;
> +    }
> +
>      for (i = 0; i < PCI_ROM_SLOT; i++) {
>          vfio_map_bar(vdev, i);
>      }
> @@ -3144,7 +3172,8 @@ static void vfio_pci_reset_handler(void *opaque)
>
>      QLIST_FOREACH(group, &group_list, next) {
>          QLIST_FOREACH(vdev, &group->device_list, next) {
> -            if (vdev->needs_reset) {
> +            /* HACK: restrict reset to PCI devices (have config_size) for
> now */
> +            if (vdev->config_size && vdev->needs_reset) {
>                  vfio_pci_hot_reset_multi(vdev);
>              }
>          }
> @@ -3418,25 +3447,18 @@ static int vfio_get_device(VFIOGroup *group, const
> char *name, VFIODevice *vdev)
>      DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
>              dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
>
> -    if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
> -        error_report("vfio: Um, this isn't a PCI device");
> -        goto error;
> -    }
> -
> +    vdev->nr_regions = dev_info.num_regions;
>      vdev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
>
> -    if (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) {
> +    if (dev_info.num_regions > PCI_NUM_REGIONS) ||
> +        ((dev_info.flags & VFIO_DEVICE_FLAGS_PCI) &&
> +         (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1)) {
>          error_report("vfio: unexpected number of io regions %u",
>                       dev_info.num_regions);
>          goto error;
>      }
>
> -    if (dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) {
> -        error_report("vfio: unexpected number of irqs %u",
> dev_info.num_irqs);
> -        goto error;
> -    }
> -
> -    for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX;
> i++) {
> +    for (i = 0; i < dev_info.num_regions; i++) {
>          reg_info.index = i;
>
>          ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
> @@ -3458,6 +3480,15 @@ static int vfio_get_device(VFIOGroup *group, const
> char *name, VFIODevice *vdev)
>          QLIST_INIT(&vdev->bars[i].quirks);
>      }
>
> +    /* following is for PCI devices, at least for now */
> +    if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI))
> +        return 0;
> +
> +    if (dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) {
> +        error_report("vfio: unexpected number of irqs %u",
> dev_info.num_irqs);
> +        goto error;
> +    }
> +
>      reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
>
>      ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
> @@ -3659,32 +3690,25 @@ static void
> vfio_unregister_err_notifier(VFIODevice *vdev)
>      event_notifier_cleanup(&vdev->err_notifier);
>  }
>
> -static int vfio_initfn(PCIDevice *pdev)
> +static VFIOGroup *vfio_find_get_group(char *path)
>  {
> -    VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
>      VFIOGroup *group;
> -    char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
> +    char iommu_group_path[PATH_MAX], *group_name;
>      ssize_t len;
>      struct stat st;
>      int groupid;
> -    int ret;
>
> -    /* Check that the host device exists */
> -    snprintf(path, sizeof(path),
> -             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
> -             vdev->host.domain, vdev->host.bus, vdev->host.slot,
> -             vdev->host.function);
>      if (stat(path, &st) < 0) {
> -        error_report("vfio: error: no such host device: %s", path);
> -        return -errno;
> +        error_report("vfio: error: no such host device: %s: %m", path);
> +        return NULL;
>      }
>
>      strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
>
>      len = readlink(path, iommu_group_path, PATH_MAX);
>      if (len <= 0) {
> -        error_report("vfio: error no iommu_group for device");
> -        return -errno;
> +        error_report("vfio: error no iommu_group for device: %m");
> +        return NULL;
>      }
>
>      iommu_group_path[len] = 0;
> @@ -3692,18 +3716,37 @@ static int vfio_initfn(PCIDevice *pdev)
>
>      if (sscanf(group_name, "%d", &groupid) != 1) {
>          error_report("vfio: error reading %s: %m", path);
> -        return -errno;
> +        return NULL;
>      }
>
> -    DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__,
> vdev->host.domain,
> -            vdev->host.bus, vdev->host.slot, vdev->host.function,
> groupid);
> -
>      group = vfio_get_group(groupid);
>      if (!group) {
> -        error_report("vfio: failed to get group %d", groupid);
> -        return -ENOENT;
> +        error_report("vfio: failed to get group %d: %m", groupid);
> +        return NULL;
>      }
>
> +    return group;
> +}
> +
> +static int vfio_initfn(PCIDevice *pdev)
> +{
> +    VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
> +    VFIOGroup *group;
> +    char path[PATH_MAX];
> +    int ret;
> +
> +    /* Check that the host device exists */
> +    snprintf(path, sizeof(path),
> +             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
> +             vdev->host.domain, vdev->host.bus, vdev->host.slot,
> +             vdev->host.function);
> +
> +    group = vfio_find_get_group(path);
> +
> +    DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__,
> vdev->host.domain,
> +            vdev->host.bus, vdev->host.slot, vdev->host.function,
> +            group->groupid);
> +
>      snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
>              vdev->host.domain, vdev->host.bus, vdev->host.slot,
>              vdev->host.function);
> @@ -3916,3 +3959,70 @@ static void register_vfio_pci_dev_type(void)
>  }
>
>  type_init(register_vfio_pci_dev_type)
> +
> +
> +/*
> + * VFIO platform devices
> + */
> +static void vfio_platform_realize(DeviceState *dev, Error **errp)
> +{
> +    SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
> +    VFIODevice *vdev = DO_UPCAST(VFIODevice, sbdev, sbdev);
> +    VFIOGroup *group;
> +    char path[PATH_MAX];
> +    int ret;
> +
> +    vdev->name = malloc(PATH_MAX);
> +    strcpy(vdev->name, "fff51000.ethernet");
> +
> +    /* Check that the host device exists */
> +    snprintf(path, sizeof(path),
> +             "/sys/bus/platform/devices/%s/", vdev->name);
> +
> +    group = vfio_find_get_group(path);
> +    if (!group) {
> +        error_report("vfio: failed to get group for device %s", path);
> +        return;
> +    }
> +
> +    strcpy(path, vdev->name);
> +    ret = vfio_get_device(group, path, vdev);
> +    if (ret) {
> +        error_report("vfio: failed to get device %s", path);
> +        vfio_put_group(group);
> +        return;
> +    }
> +
> +    vfio_map_bars(vdev);
> +
> +    sysbus_init_mmio(sbdev, &vdev->bars[0].mmap_mem);
> +}
> +
> +static const VMStateDescription vfio_platform_vmstate = {
> +    .name = "vfio-platform",
> +    .unmigratable = 1,
> +};
> +
> +static void vfio_platform_dev_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    dc->realize = vfio_platform_realize;
> +    dc->vmsd = &vfio_platform_vmstate;
> +    dc->desc = "VFIO-based platform device assignment";
> +    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
> +}
> +
> +static const TypeInfo vfio_platform_dev_info = {
> +    .name = "vfio-platform",
> +    .parent = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(VFIODevice),
> +    .class_init = vfio_platform_dev_class_init,
> +};
> +
> +static void register_vfio_platform_dev_type(void)
> +{
> +    type_register_static(&vfio_platform_dev_info);
> +}
> +
> +type_init(register_vfio_platform_dev_type)
> --
> 1.9.0
>
>

Reply via email to