Re: [RFC v3 04/19] Add device IO ops vector
> On Nov 19, 2021, at 2:42 PM, Alex Williamson > wrote: > > On Mon, 8 Nov 2021 16:46:32 -0800 > John Johnson wrote: > >> >> +int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t >> size, >> +void *data, bool post); > > The @post arg is not really developed in this patch, it would make > review easier to add the arg when it becomes implemented and used. > ok >> >> @@ -2382,6 +2398,21 @@ int vfio_get_region_info(VFIODevice *vbasedev, int >> index, >> struct vfio_region_info **info) >> { >> size_t argsz = sizeof(struct vfio_region_info); >> +int fd = -1; >> +int ret; >> + >> +/* create region cache */ >> +if (vbasedev->regions == NULL) { >> +vbasedev->regions = g_new0(struct vfio_region_info *, >> + vbasedev->num_regions); >> +} > > Seems like this should have been part of vfio_get_device() since that's > where we set num_regions. > >> +/* check cache */ >> +if (vbasedev->regions[index] != NULL) { >> +*info = g_malloc0(vbasedev->regions[index]->argsz); >> +memcpy(*info, vbasedev->regions[index], >> + vbasedev->regions[index]->argsz); >> +return 0; >> +} > > Why are we calling vfio_get_region_info() enough that we need a cache? > This looks like an optimization for something that doesn't exist yet. > And if we have a cache, why aren't callers using it directly rather > than creating a copy for each caller? > The are a couple types of vfio_get_region_info() callers. One type wants a specific region, which it then uses to fill in it’s own data structures (e.g., vfio_region_setup()) and the other type uses vfio_get_dev_region_info() to loop through all regions looking for a specific one (e.g., for the migration region or for a specifc device to setup its quirks) On top of that, each vfio_get_region_info() will need to call the server twice for regions with capabilities: once to get the proper ‘argsz’ and once to actually get the info. I thought just having a cache that gets filled once, then satisfies all later callers was a cleaner idea. It also solved the issue I had that the FD returned isn't used until the container is setup, which would’ve needed another vfio_get_region_info() call. I can certainly move the cache fill to vfio_get_device() and remove the copies if you like that idea better. >> >> *info = g_malloc0(argsz); >> >> @@ -2389,19 +2420,28 @@ int vfio_get_region_info(VFIODevice *vbasedev, int >> index, >> retry: >> (*info)->argsz = argsz; >> >> -if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { >> +ret = VDEV_GET_REGION_INFO(vbasedev, *info, ); >> +if (ret != 0) { >> g_free(*info); >> *info = NULL; >> -return -errno; >> +return ret; >> } >> >> if ((*info)->argsz > argsz) { >> argsz = (*info)->argsz; >> *info = g_realloc(*info, argsz); >> +if (fd != -1) { >> +close(fd); >> +fd = -1; >> +} > > Use of fd here is hard to review, it's clearly for some future use case. > The FD code can be moved to a patch where it’s used. >> >> + >> +static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t >> off, >> +uint32_t size, void *data, bool post) >> +{ >> +struct vfio_region_info *info = vbasedev->regions[index]; >> +int ret; >> + >> +ret = pwrite(vbasedev->fd, data, size, info->offset + off); >> +if (ret < 0) { >> +ret = -errno; >> +} >> + >> +return ret; >> +} > > Simplify all these with ternaries? > >return ret < 0 ? -errno : ret; > ok >> >> static void vfio_pci_size_rom(VFIOPCIDevice *vdev) >> { >> +VFIODevice *vbasedev = >vbasedev; >> uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); >> -off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; >> DeviceState *dev = DEVICE(vdev); >> char *name; >> -int fd = vdev->vbasedev.fd; >> >> if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { >> /* Since pci handles romfile, just print a message and return */ >> @@ -926,13 +928,23 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) >> * Use the same size ROM BAR as the physical device. The contents >> * will get filled in later when the guest tries to read it. >> */ >> -if (pread(fd, , 4, offset) != 4 || >> -pwrite(fd, , 4, offset) != 4 || >> -pread(fd, , 4, offset) != 4 || >> -pwrite(fd, , 4, offset) != 4) { >> -error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name); >> +#define rom_read(var) VDEV_REGION_READ(vbasedev, \ >> + VFIO_PCI_CONFIG_REGION_INDEX, \ >> + PCI_ROM_ADDRESS, 4, (var)) >> +#define rom_write(var) VDEV_REGION_WRITE(vbasedev,
Re: [RFC v3 04/19] Add device IO ops vector
On Mon, 8 Nov 2021 16:46:32 -0800 John Johnson wrote: > Used for communication with VFIO driver > (prep work for vfio-user, which will communicate over a socket) > > Signed-off-by: John G Johnson > --- > include/hw/vfio/vfio-common.h | 28 > hw/vfio/common.c | 159 > ++ > hw/vfio/pci.c | 146 -- > 3 files changed, 265 insertions(+), 68 deletions(-) > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 9b3c5e5..43fa948 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -124,6 +124,7 @@ typedef struct VFIOHostDMAWindow { > } VFIOHostDMAWindow; > > typedef struct VFIODeviceOps VFIODeviceOps; > +typedef struct VFIODevIO VFIODevIO; > > typedef struct VFIODevice { > QLIST_ENTRY(VFIODevice) next; > @@ -139,12 +140,14 @@ typedef struct VFIODevice { > bool ram_block_discard_allowed; > bool enable_migration; > VFIODeviceOps *ops; > +VFIODevIO *io_ops; > unsigned int num_irqs; > unsigned int num_regions; > unsigned int flags; > VFIOMigration *migration; > Error *migration_blocker; > OnOffAuto pre_copy_dirty_page_tracking; > +struct vfio_region_info **regions; > } VFIODevice; > > struct VFIODeviceOps { > @@ -164,6 +167,30 @@ struct VFIODeviceOps { > * through ioctl() to the kernel VFIO driver, but vfio-user > * can use a socket to a remote process. > */ > +struct VFIODevIO { > +int (*get_info)(VFIODevice *vdev, struct vfio_device_info *info); > +int (*get_region_info)(VFIODevice *vdev, > + struct vfio_region_info *info, int *fd); > +int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); > +int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); > +int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t > size, > + void *data); > +int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t > size, > +void *data, bool post); The @post arg is not really developed in this patch, it would make review easier to add the arg when it becomes implemented and used. > +}; > + > +#define VDEV_GET_INFO(vdev, info) \ > +((vdev)->io_ops->get_info((vdev), (info))) > +#define VDEV_GET_REGION_INFO(vdev, info, fd) \ > +((vdev)->io_ops->get_region_info((vdev), (info), (fd))) > +#define VDEV_GET_IRQ_INFO(vdev, irq) \ > +((vdev)->io_ops->get_irq_info((vdev), (irq))) > +#define VDEV_SET_IRQS(vdev, irqs) \ > +((vdev)->io_ops->set_irqs((vdev), (irqs))) > +#define VDEV_REGION_READ(vdev, nr, off, size, data) \ > +((vdev)->io_ops->region_read((vdev), (nr), (off), (size), (data))) > +#define VDEV_REGION_WRITE(vdev, nr, off, size, data, post) \ > +((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data), > (post))) > > struct VFIOContIO { > int (*dma_map)(VFIOContainer *container, > @@ -184,6 +211,7 @@ struct VFIOContIO { > #define CONT_DIRTY_BITMAP(cont, bitmap, range) \ > ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range))) > > +extern VFIODevIO vfio_dev_io_ioctl; > extern VFIOContIO vfio_cont_io_ioctl; > > #endif /* CONFIG_LINUX */ > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 50748be..41fdd78 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -70,7 +70,7 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index) > .count = 0, > }; > > -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set); > +VDEV_SET_IRQS(vbasedev, _set); > } > > void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) > @@ -83,7 +83,7 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int > index) > .count = 1, > }; > > -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set); > +VDEV_SET_IRQS(vbasedev, _set); > } > > void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) > @@ -96,7 +96,7 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int > index) > .count = 1, > }; > > -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set); > +VDEV_SET_IRQS(vbasedev, _set); > } > > static inline const char *action_to_str(int action) > @@ -177,9 +177,7 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int > index, int subindex, > pfd = (int32_t *)_set->data; > *pfd = fd; > > -if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { > -ret = -errno; > -} > +ret = VDEV_SET_IRQS(vbasedev, irq_set); > g_free(irq_set); > > if (!ret) { > @@ -214,6 +212,7 @@ void vfio_region_write(void *opaque, hwaddr addr, > uint32_t dword; > uint64_t qword; > } buf; > +int ret; > > switch (size) { > case 1: > @@ -233,13 +232,15 @@ void vfio_region_write(void *opaque, hwaddr addr, > break; > } > > -if (pwrite(vbasedev->fd, ,
[RFC v3 04/19] Add device IO ops vector
Used for communication with VFIO driver (prep work for vfio-user, which will communicate over a socket) Signed-off-by: John G Johnson --- include/hw/vfio/vfio-common.h | 28 hw/vfio/common.c | 159 ++ hw/vfio/pci.c | 146 -- 3 files changed, 265 insertions(+), 68 deletions(-) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 9b3c5e5..43fa948 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -124,6 +124,7 @@ typedef struct VFIOHostDMAWindow { } VFIOHostDMAWindow; typedef struct VFIODeviceOps VFIODeviceOps; +typedef struct VFIODevIO VFIODevIO; typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; @@ -139,12 +140,14 @@ typedef struct VFIODevice { bool ram_block_discard_allowed; bool enable_migration; VFIODeviceOps *ops; +VFIODevIO *io_ops; unsigned int num_irqs; unsigned int num_regions; unsigned int flags; VFIOMigration *migration; Error *migration_blocker; OnOffAuto pre_copy_dirty_page_tracking; +struct vfio_region_info **regions; } VFIODevice; struct VFIODeviceOps { @@ -164,6 +167,30 @@ struct VFIODeviceOps { * through ioctl() to the kernel VFIO driver, but vfio-user * can use a socket to a remote process. */ +struct VFIODevIO { +int (*get_info)(VFIODevice *vdev, struct vfio_device_info *info); +int (*get_region_info)(VFIODevice *vdev, + struct vfio_region_info *info, int *fd); +int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); +int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); +int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, + void *data); +int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, +void *data, bool post); +}; + +#define VDEV_GET_INFO(vdev, info) \ +((vdev)->io_ops->get_info((vdev), (info))) +#define VDEV_GET_REGION_INFO(vdev, info, fd) \ +((vdev)->io_ops->get_region_info((vdev), (info), (fd))) +#define VDEV_GET_IRQ_INFO(vdev, irq) \ +((vdev)->io_ops->get_irq_info((vdev), (irq))) +#define VDEV_SET_IRQS(vdev, irqs) \ +((vdev)->io_ops->set_irqs((vdev), (irqs))) +#define VDEV_REGION_READ(vdev, nr, off, size, data) \ +((vdev)->io_ops->region_read((vdev), (nr), (off), (size), (data))) +#define VDEV_REGION_WRITE(vdev, nr, off, size, data, post) \ +((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data), (post))) struct VFIOContIO { int (*dma_map)(VFIOContainer *container, @@ -184,6 +211,7 @@ struct VFIOContIO { #define CONT_DIRTY_BITMAP(cont, bitmap, range) \ ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range))) +extern VFIODevIO vfio_dev_io_ioctl; extern VFIOContIO vfio_cont_io_ioctl; #endif /* CONFIG_LINUX */ diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 50748be..41fdd78 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -70,7 +70,7 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index) .count = 0, }; -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set); +VDEV_SET_IRQS(vbasedev, _set); } void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) @@ -83,7 +83,7 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) .count = 1, }; -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set); +VDEV_SET_IRQS(vbasedev, _set); } void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) @@ -96,7 +96,7 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) .count = 1, }; -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set); +VDEV_SET_IRQS(vbasedev, _set); } static inline const char *action_to_str(int action) @@ -177,9 +177,7 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, pfd = (int32_t *)_set->data; *pfd = fd; -if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { -ret = -errno; -} +ret = VDEV_SET_IRQS(vbasedev, irq_set); g_free(irq_set); if (!ret) { @@ -214,6 +212,7 @@ void vfio_region_write(void *opaque, hwaddr addr, uint32_t dword; uint64_t qword; } buf; +int ret; switch (size) { case 1: @@ -233,13 +232,15 @@ void vfio_region_write(void *opaque, hwaddr addr, break; } -if (pwrite(vbasedev->fd, , size, region->fd_offset + addr) != size) { +ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, , false); +if (ret != size) { +const char *err = ret < 0 ? strerror(-ret) : "short write"; + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ",%d) failed: %m", + ",%d) failed: %s", __func__, vbasedev->name, region->nr, - addr, data, size); +