Hi Alex, On 03/09/2016 08:53 PM, Alex Williamson wrote: > Both platform and PCI vfio drivers create a "slow", I/O memory region > with one or more mmap memory regions overlayed when supported by the > device. Generalize this to a set of common helpers in the core that > pulls the region info from vfio, fills the region data, configures > slow mapping, and adds helpers for comleting the mmap, enable/disable, > and teardown. This can be immediately used by the PCI MSI-X code, > which needs to mmap around the MSI-X vector table. > > This also changes VFIORegion.mem to be dynamically allocated because > otherwise we don't know how the caller has allocated VFIORegion and > therefore don't know whether to unreference it to destroy the > MemoryRegion or not. > > Signed-off-by: Alex Williamson <alex.william...@redhat.com> > --- > hw/arm/sysbus-fdt.c | 4 - > hw/vfio/common.c | 172 > ++++++++++++++++++++++++++++++++++------- > hw/vfio/pci-quirks.c | 24 +++--- > hw/vfio/pci.c | 168 +++++++++++++++++++++------------------- > hw/vfio/platform.c | 72 +++-------------- > include/hw/vfio/vfio-common.h | 23 ++++- > trace-events | 10 ++ > 7 files changed, 283 insertions(+), 190 deletions(-) > > diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c > index 04afeae..49bd212 100644 > --- a/hw/arm/sysbus-fdt.c > +++ b/hw/arm/sysbus-fdt.c > @@ -240,7 +240,7 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice > *sbdev, void *opaque) > mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); > reg_attr[2 * i] = cpu_to_be32(mmio_base); > reg_attr[2 * i + 1] = cpu_to_be32( > - memory_region_size(&vdev->regions[i]->mem)); > + memory_region_size(vdev->regions[i]->mem)); > } > qemu_fdt_setprop(fdt, nodename, "reg", reg_attr, > vbasedev->num_regions * 2 * sizeof(uint32_t)); > @@ -374,7 +374,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, > void *opaque) > mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); > reg_attr[2 * i] = cpu_to_be32(mmio_base); > reg_attr[2 * i + 1] = cpu_to_be32( > - memory_region_size(&vdev->regions[i]->mem)); > + memory_region_size(vdev->regions[i]->mem)); > } > qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr, > vbasedev->num_regions * 2 * sizeof(uint32_t)); > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index e20fc4f..96ccb79 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -493,46 +493,162 @@ static void vfio_listener_release(VFIOContainer > *container) > memory_listener_unregister(&container->listener); > } > > -int vfio_mmap_region(Object *obj, VFIORegion *region, > - MemoryRegion *mem, MemoryRegion *submem, > - void **map, size_t size, off_t offset, > - const char *name) > +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, > + int index, const char *name) > { > - int ret = 0; > - VFIODevice *vbasedev = region->vbasedev; > + struct vfio_region_info *info; > + int ret; > + > + ret = vfio_get_region_info(vbasedev, index, &info); > + if (ret) { > + return ret; > + } > + > + region->vbasedev = vbasedev; > + region->flags = info->flags; > + region->size = info->size; > + region->fd_offset = info->offset; > + region->nr = index; > > - if (!vbasedev->no_mmap && size && region->flags & > - VFIO_REGION_INFO_FLAG_MMAP) { > - int prot = 0; > + if (region->size) { > + region->mem = g_new0(MemoryRegion, 1); > + memory_region_init_io(region->mem, obj, &vfio_region_ops, > + region, name, region->size); > > - if (region->flags & VFIO_REGION_INFO_FLAG_READ) { > - prot |= PROT_READ; > + if (!vbasedev->no_mmap && > + region->flags & VFIO_REGION_INFO_FLAG_MMAP && > + !(region->size & ~qemu_real_host_page_mask)) { > + > + region->nr_mmaps = 1; > + region->mmaps = g_new0(VFIOMmap, region->nr_mmaps); > + > + region->mmaps[0].offset = 0; > + region->mmaps[0].size = region->size; > } > + } > + > + g_free(info); > + > + trace_vfio_region_setup(vbasedev->name, index, name, > + region->flags, region->fd_offset, region->size); > + return 0; > +} > > - if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { > - prot |= PROT_WRITE; > +int vfio_region_mmap(VFIORegion *region) > +{ > + int i, prot = 0; > + char *name; > + > + if (!region->mem) { > + return 0; > + } > + > + prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; > + prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; > + > + for (i = 0; i < region->nr_mmaps; i++) { > + region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, > + MAP_SHARED, region->vbasedev->fd, > + region->fd_offset + > + region->mmaps[i].offset); > + if (region->mmaps[i].mmap == MAP_FAILED) { > + int ret = -errno; > + > + trace_vfio_region_mmap_fault(memory_region_name(region->mem), i, > + region->fd_offset + > + region->mmaps[i].offset, > + region->fd_offset + > + region->mmaps[i].offset + > + region->mmaps[i].size - 1, ret); > + > + region->mmaps[i].mmap = NULL; > + > + for (i--; i >= 0; i--) { > + memory_region_del_subregion(region->mem, > ®ion->mmaps[i].mem); > + munmap(region->mmaps[i].mmap, region->mmaps[i].size); > + object_unparent(OBJECT(®ion->mmaps[i].mem)); > + region->mmaps[i].mmap = NULL; > + } > + > + return ret; > } > > - *map = mmap(NULL, size, prot, MAP_SHARED, > - vbasedev->fd, > - region->fd_offset + offset); > - if (*map == MAP_FAILED) { > - *map = NULL; > - ret = -errno; > - goto empty_region; > + name = g_strdup_printf("%s mmaps[%d]", > + memory_region_name(region->mem), i); > + memory_region_init_ram_ptr(®ion->mmaps[i].mem, > + memory_region_owner(region->mem), > + name, region->mmaps[i].size, > + region->mmaps[i].mmap); > + g_free(name); > + memory_region_set_skip_dump(®ion->mmaps[i].mem); > + memory_region_add_subregion(region->mem, region->mmaps[i].offset, > + ®ion->mmaps[i].mem); > + > + trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem), > + region->mmaps[i].offset, > + region->mmaps[i].offset + > + region->mmaps[i].size - 1); > + } > + > + return 0; > +} > + > +void vfio_region_exit(VFIORegion *region) > +{ > + int i; > + > + if (!region->mem) { > + return; > + } > + > + for (i = 0; i < region->nr_mmaps; i++) { > + if (region->mmaps[i].mmap) { > + memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); > } > + } > > - memory_region_init_ram_ptr(submem, obj, name, size, *map); > - memory_region_set_skip_dump(submem); > - } else { > -empty_region: > - /* Create a zero sized sub-region to make cleanup easy. */ > - memory_region_init(submem, obj, name, 0); > + trace_vfio_region_exit(region->vbasedev->name, region->nr); > +} > + > +void vfio_region_finalize(VFIORegion *region) > +{ > + int i; > + > + if (!region->mem) { > + return; > } > > - memory_region_add_subregion(mem, offset, submem); > + for (i = 0; i < region->nr_mmaps; i++) { > + if (region->mmaps[i].mmap) { > + munmap(region->mmaps[i].mmap, region->mmaps[i].size); > + object_unparent(OBJECT(®ion->mmaps[i].mem)); > + } > + } > > - return ret; > + object_unparent(OBJECT(region->mem)); > + > + g_free(region->mem); > + g_free(region->mmaps); > + > + trace_vfio_region_finalize(region->vbasedev->name, region->nr); > +} > + > +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled) > +{ > + int i; > + > + if (!region->mem) { > + return; > + } > + > + for (i = 0; i < region->nr_mmaps; i++) { > + if (region->mmaps[i].mmap) { > + memory_region_set_enabled(®ion->mmaps[i].mem, enabled); > + } > + } > + > + trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem), > + enabled); > } > > void vfio_reset_handler(void *opaque) > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c > index 4815527..d626ec9 100644 > --- a/hw/vfio/pci-quirks.c > +++ b/hw/vfio/pci-quirks.c > @@ -337,14 +337,14 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice > *vdev, int nr) > memory_region_init_io(window->addr_mem, OBJECT(vdev), > &vfio_generic_window_address_quirk, window, > "vfio-ati-bar4-window-address-quirk", 4); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > window->address_offset, > window->addr_mem, 1); > > memory_region_init_io(window->data_mem, OBJECT(vdev), > &vfio_generic_window_data_quirk, window, > "vfio-ati-bar4-window-data-quirk", 4); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > window->data_offset, > window->data_mem, 1); > > @@ -378,7 +378,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice > *vdev, int nr) > memory_region_init_io(mirror->mem, OBJECT(vdev), > &vfio_generic_mirror_quirk, mirror, > "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > mirror->offset, mirror->mem, 1); > > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); > @@ -683,7 +683,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice > *vdev, int nr) > memory_region_init_io(window->addr_mem, OBJECT(vdev), > &vfio_generic_window_address_quirk, window, > "vfio-nvidia-bar5-window-address-quirk", 4); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > window->address_offset, > window->addr_mem, 1); > memory_region_set_enabled(window->addr_mem, false); > @@ -691,7 +691,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice > *vdev, int nr) > memory_region_init_io(window->data_mem, OBJECT(vdev), > &vfio_generic_window_data_quirk, window, > "vfio-nvidia-bar5-window-data-quirk", 4); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > window->data_offset, > window->data_mem, 1); > memory_region_set_enabled(window->data_mem, false); > @@ -699,13 +699,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice > *vdev, int nr) > memory_region_init_io(&quirk->mem[2], OBJECT(vdev), > &vfio_nvidia_bar5_quirk_master, bar5, > "vfio-nvidia-bar5-master-quirk", 4); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > 0, &quirk->mem[2], 1); > > memory_region_init_io(&quirk->mem[3], OBJECT(vdev), > &vfio_nvidia_bar5_quirk_enable, bar5, > "vfio-nvidia-bar5-enable-quirk", 4); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > 4, &quirk->mem[3], 1); > > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); > @@ -767,7 +767,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice > *vdev, int nr) > &vfio_nvidia_mirror_quirk, mirror, > "vfio-nvidia-bar0-88000-mirror-quirk", > vdev->config_size); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > mirror->offset, mirror->mem, 1); > > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); > @@ -786,7 +786,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice > *vdev, int nr) > &vfio_nvidia_mirror_quirk, mirror, > "vfio-nvidia-bar0-1800-mirror-quirk", > PCI_CONFIG_SPACE_SIZE); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > mirror->offset, mirror->mem, 1); > > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); > @@ -947,13 +947,13 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice > *vdev, int nr) > memory_region_init_io(&quirk->mem[0], OBJECT(vdev), > &vfio_rtl_address_quirk, rtl, > "vfio-rtl8168-window-address-quirk", 4); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > 0x74, &quirk->mem[0], 1); > > memory_region_init_io(&quirk->mem[1], OBJECT(vdev), > &vfio_rtl_data_quirk, rtl, > "vfio-rtl8168-window-data-quirk", 4); > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, > 0x70, &quirk->mem[1], 1); > > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); > @@ -1020,7 +1020,7 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int > nr) > > QLIST_FOREACH(quirk, &bar->quirks, next) { > for (i = 0; i < quirk->nr_mem; i++) { > - memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]); > + memory_region_del_subregion(bar->region.mem, &quirk->mem[i]); > } > } > } > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index db7a950..f18a678 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1166,6 +1166,74 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos) > return 0; > } > > +static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev) > +{ > + off_t start, end; > + VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region; > + > + /* > + * We expect to find a single mmap covering the whole BAR, anything else > + * means it's either unsupported or already setup. > + */ > + if (region->nr_mmaps != 1 || region->mmaps[0].offset || > + region->size != region->mmaps[0].size) { > + return; > + } > + > + /* MSI-X table start and end aligned to host page size */ > + start = vdev->msix->table_offset & qemu_real_host_page_mask; > + end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + > + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); > + > + /* > + * Does the MSI-X table cover the beginning of the BAR? The whole BAR? > + * NB - Host page size is necessarily a power of two and so is the PCI > + * BAR (not counting EA yet), therefore if we have host page aligned > + * @start and @end, then any remainder of the BAR before or after those > + * must be at least host page sized and therefore mmap'able. > + */ > + if (!start) { > + if (end >= region->size) { > + region->nr_mmaps = 0; > + g_free(region->mmaps); > + region->mmaps = NULL; > + trace_vfio_msix_fixup(vdev->vbasedev.name, > + vdev->msix->table_bar, 0, 0); > + } else { > + region->mmaps[0].offset = end; > + region->mmaps[0].size = region->size - end; > + trace_vfio_msix_fixup(vdev->vbasedev.name, > + vdev->msix->table_bar, region->mmaps[0].offset, > + region->mmaps[0].offset + > region->mmaps[0].size); Sorry this does not compile for me on arm 32b:
./trace/generated-tracers.h:16113:23: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 8 has type ‘off_t’ [-Werror=format=] , name, bar, offset, size); -> vfio_msix_fixup(const char *name, int bar, off_t start, off_t end) " (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]" ? Best Regards Eric > + } > + > + /* Maybe it's aligned at the end of the BAR */ > + } else if (end >= region->size) { > + region->mmaps[0].size = start; > + trace_vfio_msix_fixup(vdev->vbasedev.name, > + vdev->msix->table_bar, region->mmaps[0].offset, > + region->mmaps[0].offset + > region->mmaps[0].size); > + > + /* Otherwise it must split the BAR */ > + } else { > + region->nr_mmaps = 2; > + region->mmaps = g_renew(VFIOMmap, region->mmaps, 2); > + > + memcpy(®ion->mmaps[1], ®ion->mmaps[0], sizeof(VFIOMmap)); > + > + region->mmaps[0].size = start; > + trace_vfio_msix_fixup(vdev->vbasedev.name, > + vdev->msix->table_bar, region->mmaps[0].offset, > + region->mmaps[0].offset + > region->mmaps[0].size); > + > + region->mmaps[1].offset = end; > + region->mmaps[1].size = region->size - end; > + trace_vfio_msix_fixup(vdev->vbasedev.name, > + vdev->msix->table_bar, region->mmaps[1].offset, > + region->mmaps[1].offset + > region->mmaps[1].size); > + } > +} > + > /* > * We don't have any control over how pci_add_capability() inserts > * capabilities into the chain. In order to setup MSI-X we need a > @@ -1240,6 +1308,8 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev) > msix->table_offset, msix->entries); > vdev->msix = msix; > > + vfio_pci_fixup_msix_region(vdev); > + > return 0; > } > > @@ -1250,9 +1320,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos) > vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) * > sizeof(unsigned long)); > ret = msix_init(&vdev->pdev, vdev->msix->entries, > - &vdev->bars[vdev->msix->table_bar].region.mem, > + vdev->bars[vdev->msix->table_bar].region.mem, > vdev->msix->table_bar, vdev->msix->table_offset, > - &vdev->bars[vdev->msix->pba_bar].region.mem, > + vdev->bars[vdev->msix->pba_bar].region.mem, > vdev->msix->pba_bar, vdev->msix->pba_offset, pos); > if (ret < 0) { > if (ret == -ENOTSUP) { > @@ -1289,8 +1359,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev) > > if (vdev->msix) { > msix_uninit(&vdev->pdev, > - &vdev->bars[vdev->msix->table_bar].region.mem, > - &vdev->bars[vdev->msix->pba_bar].region.mem); > + vdev->bars[vdev->msix->table_bar].region.mem, > + vdev->bars[vdev->msix->pba_bar].region.mem); > g_free(vdev->msix->pending); > } > } > @@ -1303,16 +1373,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, > bool enabled) > int i; > > for (i = 0; i < PCI_ROM_SLOT; i++) { > - VFIOBAR *bar = &vdev->bars[i]; > - > - if (!bar->region.size) { > - continue; > - } > - > - memory_region_set_enabled(&bar->region.mmap_mem, enabled); > - if (vdev->msix && vdev->msix->table_bar == i) { > - memory_region_set_enabled(&vdev->msix->mmap_mem, enabled); > - } > + vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled); > } > } > > @@ -1326,11 +1387,7 @@ static void vfio_unregister_bar(VFIOPCIDevice *vdev, > int nr) > > vfio_bar_quirk_teardown(vdev, nr); > > - memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem); > - > - if (vdev->msix && vdev->msix->table_bar == nr) { > - memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem); > - } > + vfio_region_exit(&bar->region); > } > > static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) > @@ -1343,18 +1400,13 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int > nr) > > vfio_bar_quirk_free(vdev, nr); > > - munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem)); > - > - if (vdev->msix && vdev->msix->table_bar == nr) { > - munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); > - } > + vfio_region_finalize(&bar->region); > } > > static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) > { > VFIOBAR *bar = &vdev->bars[nr]; > uint64_t size = bar->region.size; > - char name[64]; > uint32_t pci_bar; > uint8_t type; > int ret; > @@ -1364,8 +1416,6 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) > return; > } > > - snprintf(name, sizeof(name), "VFIO %s BAR %d", vdev->vbasedev.name, nr); > - > /* Determine what type of BAR this is for registration */ > ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), > vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); > @@ -1380,41 +1430,11 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) > type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : > ~PCI_BASE_ADDRESS_MEM_MASK); > > - /* A "slow" read/write mapping underlies all BARs */ > - memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops, > - bar, name, size); > - pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem); > - > - /* > - * We can't mmap areas overlapping the MSIX vector table, so we > - * potentially insert a direct-mapped subregion before and after it. > - */ > - if (vdev->msix && vdev->msix->table_bar == nr) { > - size = vdev->msix->table_offset & qemu_real_host_page_mask; > - } > - > - strncat(name, " mmap", sizeof(name) - strlen(name) - 1); > - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, > - &bar->region.mmap_mem, &bar->region.mmap, > - size, 0, name)) { > - error_report("%s unsupported. Performance may be slow", name); > - } > - > - if (vdev->msix && vdev->msix->table_bar == nr) { > - uint64_t start; > - > - start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + > - (vdev->msix->entries * > - PCI_MSIX_ENTRY_SIZE)); > + pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); > > - size = start < bar->region.size ? bar->region.size - start : 0; > - strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); > - /* VFIOMSIXInfo contains another MemoryRegion for this mapping */ > - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, > - &vdev->msix->mmap_mem, > - &vdev->msix->mmap, size, start, name)) { > - error_report("%s unsupported. Performance may be slow", name); > - } > + if (vfio_region_mmap(&bar->region)) { > + error_report("Failed to mmap %s BAR %d. Performance may be slow", > + vdev->vbasedev.name, nr); > } > > vfio_bar_quirk_setup(vdev, nr); > @@ -2049,25 +2069,18 @@ static int vfio_populate_device(VFIOPCIDevice *vdev) > } > > for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) > { > - ret = vfio_get_region_info(vbasedev, i, ®_info); > + char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i); > + > + ret = vfio_region_setup(OBJECT(vdev), vbasedev, > + &vdev->bars[i].region, i, name); > + g_free(name); > + > if (ret) { > error_report("vfio: Error getting region %d info: %m", i); > goto error; > } > > - trace_vfio_populate_device_region(vbasedev->name, i, > - (unsigned long)reg_info->size, > - (unsigned long)reg_info->offset, > - (unsigned long)reg_info->flags); > - > - vdev->bars[i].region.vbasedev = vbasedev; > - vdev->bars[i].region.flags = reg_info->flags; > - vdev->bars[i].region.size = reg_info->size; > - vdev->bars[i].region.fd_offset = reg_info->offset; > - vdev->bars[i].region.nr = i; > QLIST_INIT(&vdev->bars[i].quirks); > - > - g_free(reg_info); > } > > ret = vfio_get_region_info(vbasedev, > @@ -2153,11 +2166,8 @@ error: > static void vfio_put_device(VFIOPCIDevice *vdev) > { > g_free(vdev->vbasedev.name); > - if (vdev->msix) { > - object_unparent(OBJECT(&vdev->msix->mmap_mem)); > - g_free(vdev->msix); > - vdev->msix = NULL; > - } > + g_free(vdev->msix); > + > vfio_put_base_device(&vdev->vbasedev); > } > > diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c > index f9b9c20..a2ab75d 100644 > --- a/hw/vfio/platform.c > +++ b/hw/vfio/platform.c > @@ -143,12 +143,8 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice > *vdev, bool enabled) > { > int i; > > - trace_vfio_platform_mmap_set_enabled(enabled); > - > for (i = 0; i < vdev->vbasedev.num_regions; i++) { > - VFIORegion *region = vdev->regions[i]; > - > - memory_region_set_enabled(®ion->mmap_mem, enabled); > + vfio_region_mmaps_set_enabled(vdev->regions[i], enabled); > } > } > > @@ -476,29 +472,16 @@ static int vfio_populate_device(VFIODevice *vbasedev) > vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions); > > for (i = 0; i < vbasedev->num_regions; i++) { > - struct vfio_region_info *reg_info; > - VFIORegion *ptr; > + char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, > i); > > vdev->regions[i] = g_new0(VFIORegion, 1); > - ptr = vdev->regions[i]; > - ret = vfio_get_region_info(vbasedev, i, ®_info); > + ret = vfio_region_setup(OBJECT(vdev), vbasedev, > + vdev->regions[i], i, name); > + g_free(name); > if (ret) { > error_report("vfio: Error getting region %d info: %m", i); > goto reg_error; > } > - ptr->flags = reg_info->flags; > - ptr->size = reg_info->size; > - ptr->fd_offset = reg_info->offset; > - ptr->nr = i; > - ptr->vbasedev = vbasedev; > - > - g_free(reg_info); > - > - trace_vfio_platform_populate_regions(ptr->nr, > - (unsigned long)ptr->flags, > - (unsigned long)ptr->size, > - ptr->vbasedev->fd, > - (unsigned long)ptr->fd_offset); > } > > vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, > @@ -535,6 +518,9 @@ irq_err: > } > reg_error: > for (i = 0; i < vbasedev->num_regions; i++) { > + if (vdev->regions[i]) { > + vfio_region_finalize(vdev->regions[i]); > + } > g_free(vdev->regions[i]); > } > g_free(vdev->regions); > @@ -636,41 +622,6 @@ static int vfio_base_device_init(VFIODevice *vbasedev) > } > > /** > - * vfio_map_region - initialize the 2 memory regions for a given > - * MMIO region index > - * @vdev: the VFIO platform device handle > - * @nr: the index of the region > - * > - * Init the top memory region and the mmapped memory region beneath > - * VFIOPlatformDevice is used since VFIODevice is not a QOM Object > - * and could not be passed to memory region functions > -*/ > -static void vfio_map_region(VFIOPlatformDevice *vdev, int nr) > -{ > - VFIORegion *region = vdev->regions[nr]; > - uint64_t size = region->size; > - char name[64]; > - > - if (!size) { > - return; > - } > - > - g_snprintf(name, sizeof(name), "VFIO %s region %d", > - vdev->vbasedev.name, nr); > - > - /* A "slow" read/write mapping underlies all regions */ > - memory_region_init_io(®ion->mem, OBJECT(vdev), &vfio_region_ops, > - region, name, size); > - > - g_strlcat(name, " mmap", sizeof(name)); > - > - if (vfio_mmap_region(OBJECT(vdev), region, ®ion->mem, > - ®ion->mmap_mem, ®ion->mmap, size, 0, name)) { > - error_report("%s unsupported. Performance may be slow", name); > - } > -} > - > -/** > * vfio_platform_realize - the device realize function > * @dev: device state pointer > * @errp: error > @@ -700,8 +651,11 @@ static void vfio_platform_realize(DeviceState *dev, > Error **errp) > } > > for (i = 0; i < vbasedev->num_regions; i++) { > - vfio_map_region(vdev, i); > - sysbus_init_mmio(sbdev, &vdev->regions[i]->mem); > + if (vfio_region_mmap(vdev->regions[i])) { > + error_report("%s mmap unsupported. Performance may be slow", > + memory_region_name(vdev->regions[i]->mem)); > + } > + sysbus_init_mmio(sbdev, vdev->regions[i]->mem); > } > } > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 371383c..594905a 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -41,14 +41,21 @@ enum { > VFIO_DEVICE_TYPE_PLATFORM = 1, > }; > > +typedef struct VFIOMmap { > + MemoryRegion mem; > + void *mmap; > + off_t offset; > + size_t size; > +} VFIOMmap; > + > typedef struct VFIORegion { > struct VFIODevice *vbasedev; > off_t fd_offset; /* offset of region within device fd */ > - MemoryRegion mem; /* slow, read/write access */ > - MemoryRegion mmap_mem; /* direct mapped access */ > - void *mmap; > + MemoryRegion *mem; /* slow, read/write access */ > size_t size; > uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ > + uint32_t nr_mmaps; > + VFIOMmap *mmaps; > uint8_t nr; /* cache the region number for debug */ > } VFIORegion; > > @@ -126,10 +133,12 @@ void vfio_region_write(void *opaque, hwaddr addr, > uint64_t data, unsigned size); > uint64_t vfio_region_read(void *opaque, > hwaddr addr, unsigned size); > -int vfio_mmap_region(Object *vdev, VFIORegion *region, > - MemoryRegion *mem, MemoryRegion *submem, > - void **map, size_t size, off_t offset, > - const char *name); > +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, > + int index, const char *name); > +int vfio_region_mmap(VFIORegion *region); > +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); > +void vfio_region_exit(VFIORegion *region); > +void vfio_region_finalize(VFIORegion *region); > void vfio_reset_handler(void *opaque); > VFIOGroup *vfio_get_group(int groupid, AddressSpace *as); > void vfio_put_group(VFIOGroup *group); > diff --git a/trace-events b/trace-events > index 6fba6cc..e36bc07 100644 > --- a/trace-events > +++ b/trace-events > @@ -1652,6 +1652,7 @@ vfio_msix_enable(const char *name) " (%s)" > vfio_msix_pba_disable(const char *name) " (%s)" > vfio_msix_pba_enable(const char *name) " (%s)" > vfio_msix_disable(const char *name) " (%s)" > +vfio_msix_fixup(const char *name, int bar, off_t offset, size_t size) " (%s) > MSI-X region %d mmap fixup [0x%lx - 0x%lx]" > vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI > vectors" > vfio_msi_disable(const char *name) " (%s)" > vfio_pci_load_rom(const char *name, unsigned long size, unsigned long > offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, > flags: 0x%lx" > @@ -1670,7 +1671,6 @@ vfio_pci_hot_reset(const char *name, const char *type) > " (%s) %s" > vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset > dependent devices:" > vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, > int group_id) "\t%04x:%02x:%02x.%x group %d" > vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot > reset: %s" > -vfio_populate_device_region(const char *region_name, int index, unsigned > long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n > size: 0x%lx, offset: 0x%lx, flags: 0x%lx" > vfio_populate_device_config(const char *name, unsigned long size, unsigned > long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: > 0x%lx, flags: 0x%lx" > vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO > failure: %m" > vfio_initfn(const char *name, int group_id) " (%s) group %d" > @@ -1726,13 +1726,17 @@ vfio_disconnect_container(int fd) "close > container->fd=%d" > vfio_put_group(int fd) "close group->fd=%d" > vfio_get_device(const char * name, unsigned int flags, unsigned int > num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: > %u" > vfio_put_base_device(int fd) "close vdev->fd=%d" > +vfio_region_setup(const char *dev, int index, const char *name, unsigned > long flags, unsigned long offset, unsigned long size) "Device %s, region %d > \"%s\", flags: %lx, offset: %lx, size: %lx" > +vfio_region_mmap_fault(const char *name, int index, unsigned long offset, > unsigned long size, int fault) "Region %s mmaps[%d], [%lx - %lx], fault: %d" > +vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) > "Region %s [%lx - %lx]" > +vfio_region_exit(const char *name, int index) "Device %s, region %d" > +vfio_region_finalize(const char *name, int index) "Device %s, region %d" > +vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s > mmaps enabled: %d" > > # hw/vfio/platform.c > -vfio_platform_populate_regions(int region_index, unsigned long flag, > unsigned long size, int fd, unsigned long offset) "- region %d flags = 0x%lx, > size = 0x%lx, fd= %d, offset = 0x%lx" > vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group > #%d" > vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s" > vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)" > -vfio_platform_mmap_set_enabled(bool enabled) "fast path = %d" > vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow > path" > vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)" > vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending > IRQ #%d (fd = %d)" > >