Hi Kim, 1) I agree with Alex on the fact I would prefer to have the qemu-side platform device code separated from PCI device one, both using a separate generic helper code. Indeed calling functions referencing BAR in the platform device does not look natural to me; although I understand the code reuse rationale. This is also how the kernel side code is laid out.
2) about nr_regions. What is the number of nr_regions do you expect for platform_devices? bars[] possible overshoot as already mentionned if nr_regions > 6. 3) I understand you build nr_regions MemoryRegion (each mmaped on the hpa). However I understand you only attach a single one to the SysBusDevice in vfio_platform_realize: sysbus_init_mmio(sbdev, &vdev->bars[0].mmap_mem); this MemRegion is assigned a gpa in virt.c? is it the correct understanding? What is the mem hierarchy that is targetted. In PCI there are subregions. In this device, there would be a single layer or hierarchy? 4) vfio_unmap_bars should destroy the platform device mmaped regions (vdev->mmap_mem and munmap the correspond hva (currently on slow regions are munmapped and destroyed) Best Regards Eric On 26 February 2014 03:37, Kim Phillips <kim.phill...@linaro.org> wrote: > We basically add support for the SysBusDevice type in addition to the > existing PCIDevice support. This involves taking common code from the > existing vfio_initfn(), and putting it under a new vfio_find_get_group(), > that both vfio_initfn() and the new vfio_platform_realize() call. > Since realize() returns void, unlike PCIDevice's initfn(), > error codes are moved into the error message text with %m. > Some exceptions to the PCI path are added for platform devices, > mostly in the form of early returns, since less setup is needed. > I chose to reuse VFIODevice's config_size to determine whether > the device was a PCI device or a platform device, which might > need to change. > > Currently only MMIO access is supported at this time. It works > because of qemu's stage 1 translation, but needs to be in stage 2 > in case other entities than the guest OS want to access it. > A KVM patch to address this is in the works. > > The perceived path for future QEMU development is: > > - support allocating a variable number of regions > - VFIODevice's bars[] become dynamically allocated *regions > - VFIOBAR's device fd replaced with parent VFIODevice ptr, > to facilitate BAR r/w ops calling vfio_eoi() > - add support for interrupts > - verify and test platform dev unmap path > - test existing PCI path for any regressions > - add support for creating platform devices on the qemu command line > - currently device address specification is hardcoded for test > development on Calxeda Midway's fff51000.ethernet device > - reset is not supported and registration of reset functions is > bypassed for platform devices. > - there is no standard means of resetting a platform device, > unsure if it suffices to be handled at device--VFIO binding time > > Signed-off-by: Kim Phillips <kim.phill...@linaro.org> > --- > hw/misc/vfio.c | 180 > ++++++++++++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 145 insertions(+), 35 deletions(-) > > diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c > index 8db182f..eed24db 100644 > --- a/hw/misc/vfio.c > +++ b/hw/misc/vfio.c > @@ -32,6 +32,7 @@ > #include "hw/pci/msi.h" > #include "hw/pci/msix.h" > #include "hw/pci/pci.h" > +#include "hw/sysbus.h" > #include "qemu-common.h" > #include "qemu/error-report.h" > #include "qemu/event_notifier.h" > @@ -166,7 +167,10 @@ typedef struct VFIOMSIXInfo { > } VFIOMSIXInfo; > > typedef struct VFIODevice { > - PCIDevice pdev; > + union { > + PCIDevice pdev; > + SysBusDevice sbdev; > + }; > int fd; > VFIOINTx intx; > unsigned int config_size; > @@ -180,6 +184,8 @@ typedef struct VFIODevice { > VFIOMSIXInfo *msix; > int nr_vectors; /* Number of MSI/MSIX vectors currently in use */ > int interrupt; /* Current interrupt type */ > + char *name; /* platform device name, e.g., fff51000.ethernet */ > + int nr_regions; /* platform devices' number of regions */ > VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ > VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */ > PCIHostDeviceAddress host; > @@ -2497,8 +2503,6 @@ empty_region: > memory_region_init(submem, OBJECT(vdev), name, 0); > } > > - memory_region_add_subregion(mem, offset, submem); > - > return ret; > } > > @@ -2552,6 +2556,7 @@ static void vfio_map_bar(VFIODevice *vdev, int nr) > &bar->mmap_mem, &bar->mmap, size, 0, name)) { > error_report("%s unsupported. Performance may be slow", name); > } > + memory_region_add_subregion(&bar->mem, 0, &bar->mmap_mem); > > if (vdev->msix && vdev->msix->table_bar == nr) { > unsigned start; > @@ -2566,15 +2571,38 @@ static void vfio_map_bar(VFIODevice *vdev, int nr) > &vdev->msix->mmap, size, start, name)) { > error_report("%s unsupported. Performance may be slow", name); > } > + memory_region_add_subregion(&bar->mem, start, > &vdev->msix->mmap_mem); > } > > vfio_bar_quirk_setup(vdev, nr); > } > > +static void vfio_map_region(VFIODevice *vdev, int nr) > +{ > + VFIOBAR *bar = &vdev->bars[nr]; > + unsigned size = bar->size; > + char name[64]; > + > + snprintf(name, sizeof(name), "VFIO %s region %d mmap", vdev->name, > nr); > + > + if (vfio_mmap_bar(vdev, bar, &bar->mem, > + &bar->mmap_mem, &bar->mmap, size, 0, name)) { > + error_report("error mmapping %s: %m", name); > + } > +} > + > static void vfio_map_bars(VFIODevice *vdev) > { > int i; > > + if (!vdev->config_size) { > + /* platform device */ > + for (i = 0; i < vdev->nr_regions; i++) { > + vfio_map_region(vdev, i); > + } > + return; > + } > + > for (i = 0; i < PCI_ROM_SLOT; i++) { > vfio_map_bar(vdev, i); > } > @@ -3144,7 +3172,8 @@ static void vfio_pci_reset_handler(void *opaque) > > QLIST_FOREACH(group, &group_list, next) { > QLIST_FOREACH(vdev, &group->device_list, next) { > - if (vdev->needs_reset) { > + /* HACK: restrict reset to PCI devices (have config_size) for > now */ > + if (vdev->config_size && vdev->needs_reset) { > vfio_pci_hot_reset_multi(vdev); > } > } > @@ -3418,25 +3447,18 @@ static int vfio_get_device(VFIOGroup *group, const > char *name, VFIODevice *vdev) > DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name, > dev_info.flags, dev_info.num_regions, dev_info.num_irqs); > > - if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) { > - error_report("vfio: Um, this isn't a PCI device"); > - goto error; > - } > - > + vdev->nr_regions = dev_info.num_regions; > vdev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); > > - if (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { > + if (dev_info.num_regions > PCI_NUM_REGIONS) || > + ((dev_info.flags & VFIO_DEVICE_FLAGS_PCI) && > + (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1)) { > error_report("vfio: unexpected number of io regions %u", > dev_info.num_regions); > goto error; > } > > - if (dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) { > - error_report("vfio: unexpected number of irqs %u", > dev_info.num_irqs); > - goto error; > - } > - > - for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; > i++) { > + for (i = 0; i < dev_info.num_regions; i++) { > reg_info.index = i; > > ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); > @@ -3458,6 +3480,15 @@ static int vfio_get_device(VFIOGroup *group, const > char *name, VFIODevice *vdev) > QLIST_INIT(&vdev->bars[i].quirks); > } > > + /* following is for PCI devices, at least for now */ > + if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) > + return 0; > + > + if (dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) { > + error_report("vfio: unexpected number of irqs %u", > dev_info.num_irqs); > + goto error; > + } > + > reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX; > > ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); > @@ -3659,32 +3690,25 @@ static void > vfio_unregister_err_notifier(VFIODevice *vdev) > event_notifier_cleanup(&vdev->err_notifier); > } > > -static int vfio_initfn(PCIDevice *pdev) > +static VFIOGroup *vfio_find_get_group(char *path) > { > - VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev); > VFIOGroup *group; > - char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; > + char iommu_group_path[PATH_MAX], *group_name; > ssize_t len; > struct stat st; > int groupid; > - int ret; > > - /* Check that the host device exists */ > - snprintf(path, sizeof(path), > - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", > - vdev->host.domain, vdev->host.bus, vdev->host.slot, > - vdev->host.function); > if (stat(path, &st) < 0) { > - error_report("vfio: error: no such host device: %s", path); > - return -errno; > + error_report("vfio: error: no such host device: %s: %m", path); > + return NULL; > } > > strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1); > > len = readlink(path, iommu_group_path, PATH_MAX); > if (len <= 0) { > - error_report("vfio: error no iommu_group for device"); > - return -errno; > + error_report("vfio: error no iommu_group for device: %m"); > + return NULL; > } > > iommu_group_path[len] = 0; > @@ -3692,18 +3716,37 @@ static int vfio_initfn(PCIDevice *pdev) > > if (sscanf(group_name, "%d", &groupid) != 1) { > error_report("vfio: error reading %s: %m", path); > - return -errno; > + return NULL; > } > > - DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, > vdev->host.domain, > - vdev->host.bus, vdev->host.slot, vdev->host.function, > groupid); > - > group = vfio_get_group(groupid); > if (!group) { > - error_report("vfio: failed to get group %d", groupid); > - return -ENOENT; > + error_report("vfio: failed to get group %d: %m", groupid); > + return NULL; > } > > + return group; > +} > + > +static int vfio_initfn(PCIDevice *pdev) > +{ > + VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev); > + VFIOGroup *group; > + char path[PATH_MAX]; > + int ret; > + > + /* Check that the host device exists */ > + snprintf(path, sizeof(path), > + "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", > + vdev->host.domain, vdev->host.bus, vdev->host.slot, > + vdev->host.function); > + > + group = vfio_find_get_group(path); > + > + DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, > vdev->host.domain, > + vdev->host.bus, vdev->host.slot, vdev->host.function, > + group->groupid); > + > snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x", > vdev->host.domain, vdev->host.bus, vdev->host.slot, > vdev->host.function); > @@ -3916,3 +3959,70 @@ static void register_vfio_pci_dev_type(void) > } > > type_init(register_vfio_pci_dev_type) > + > + > +/* > + * VFIO platform devices > + */ > +static void vfio_platform_realize(DeviceState *dev, Error **errp) > +{ > + SysBusDevice *sbdev = SYS_BUS_DEVICE(dev); > + VFIODevice *vdev = DO_UPCAST(VFIODevice, sbdev, sbdev); > + VFIOGroup *group; > + char path[PATH_MAX]; > + int ret; > + > + vdev->name = malloc(PATH_MAX); > + strcpy(vdev->name, "fff51000.ethernet"); > + > + /* Check that the host device exists */ > + snprintf(path, sizeof(path), > + "/sys/bus/platform/devices/%s/", vdev->name); > + > + group = vfio_find_get_group(path); > + if (!group) { > + error_report("vfio: failed to get group for device %s", path); > + return; > + } > + > + strcpy(path, vdev->name); > + ret = vfio_get_device(group, path, vdev); > + if (ret) { > + error_report("vfio: failed to get device %s", path); > + vfio_put_group(group); > + return; > + } > + > + vfio_map_bars(vdev); > + > + sysbus_init_mmio(sbdev, &vdev->bars[0].mmap_mem); > +} > + > +static const VMStateDescription vfio_platform_vmstate = { > + .name = "vfio-platform", > + .unmigratable = 1, > +}; > + > +static void vfio_platform_dev_class_init(ObjectClass *klass, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(klass); > + > + dc->realize = vfio_platform_realize; > + dc->vmsd = &vfio_platform_vmstate; > + dc->desc = "VFIO-based platform device assignment"; > + set_bit(DEVICE_CATEGORY_MISC, dc->categories); > +} > + > +static const TypeInfo vfio_platform_dev_info = { > + .name = "vfio-platform", > + .parent = TYPE_SYS_BUS_DEVICE, > + .instance_size = sizeof(VFIODevice), > + .class_init = vfio_platform_dev_class_init, > +}; > + > +static void register_vfio_platform_dev_type(void) > +{ > + type_register_static(&vfio_platform_dev_info); > +} > + > +type_init(register_vfio_platform_dev_type) > -- > 1.9.0 > >