Re: [RFC v3 04/19] Add device IO ops vector

2021-12-06 Thread John Johnson


> On Nov 19, 2021, at 2:42 PM, Alex Williamson  
> wrote:
> 
> On Mon,  8 Nov 2021 16:46:32 -0800
> John Johnson  wrote:
> 
>> 
>> +int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t 
>> size,
>> +void *data, bool post);
> 
> The @post arg is not really developed in this patch, it would make
> review easier to add the arg when it becomes implemented and used.
> 

ok


>> 
>> @@ -2382,6 +2398,21 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
>> index,
>>  struct vfio_region_info **info)
>> {
>> size_t argsz = sizeof(struct vfio_region_info);
>> +int fd = -1;
>> +int ret;
>> +
>> +/* create region cache */
>> +if (vbasedev->regions == NULL) {
>> +vbasedev->regions = g_new0(struct vfio_region_info *,
>> +   vbasedev->num_regions);
>> +}
> 
> Seems like this should have been part of vfio_get_device() since that's
> where we set num_regions.
> 
>> +/* check cache */
>> +if (vbasedev->regions[index] != NULL) {
>> +*info = g_malloc0(vbasedev->regions[index]->argsz);
>> +memcpy(*info, vbasedev->regions[index],
>> +   vbasedev->regions[index]->argsz);
>> +return 0;
>> +}
> 
> Why are we calling vfio_get_region_info() enough that we need a cache?
> This looks like an optimization for something that doesn't exist yet.
> And if we have a cache, why aren't callers using it directly rather
> than creating a copy for each caller?
> 

The are a couple types of vfio_get_region_info() callers.
One type wants a specific region, which it then uses to fill in
it’s own data structures (e.g., vfio_region_setup()) and the other
type uses vfio_get_dev_region_info() to loop through all regions
looking for a specific one (e.g., for the migration region or for
a specifc device to setup its quirks)  On top of that, each
vfio_get_region_info() will need to call the server twice for regions
with capabilities: once to get the proper ‘argsz’ and once to actually
get the info.

I thought just having a cache that gets filled once, then
satisfies all later callers was a cleaner idea.  It also solved the
issue I had that the FD returned isn't used until the container is
setup, which would’ve needed another vfio_get_region_info() call.

I can certainly move the cache fill to vfio_get_device()
and remove the copies if you like that idea better.



>> 
>> *info = g_malloc0(argsz);
>> 
>> @@ -2389,19 +2420,28 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
>> index,
>> retry:
>> (*info)->argsz = argsz;
>> 
>> -if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
>> +ret = VDEV_GET_REGION_INFO(vbasedev, *info, );
>> +if (ret != 0) {
>> g_free(*info);
>> *info = NULL;
>> -return -errno;
>> +return ret;
>> }
>> 
>> if ((*info)->argsz > argsz) {
>> argsz = (*info)->argsz;
>> *info = g_realloc(*info, argsz);
>> +if (fd != -1) {
>> +close(fd);
>> +fd = -1;
>> +}
> 
> Use of fd here is hard to review, it's clearly for some future use case.
> 

The FD code can be moved to a patch where it’s used.


>> 
>> +
>> +static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t 
>> off,
>> +uint32_t size, void *data, bool post)
>> +{
>> +struct vfio_region_info *info = vbasedev->regions[index];
>> +int ret;
>> +
>> +ret = pwrite(vbasedev->fd, data, size, info->offset + off);
>> +if (ret < 0) {
>> +ret = -errno;
>> +}
>> +
>> +return ret;
>> +}
> 
> Simplify all these with ternaries?
> 
>return ret < 0 ? -errno : ret;
> 

ok



>> 
>> static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
>> {
>> +VFIODevice *vbasedev = >vbasedev;
>> uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
>> -off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
>> DeviceState *dev = DEVICE(vdev);
>> char *name;
>> -int fd = vdev->vbasedev.fd;
>> 
>> if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
>> /* Since pci handles romfile, just print a message and return */
>> @@ -926,13 +928,23 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
>>  * Use the same size ROM BAR as the physical device.  The contents
>>  * will get filled in later when the guest tries to read it.
>>  */
>> -if (pread(fd, , 4, offset) != 4 ||
>> -pwrite(fd, , 4, offset) != 4 ||
>> -pread(fd, , 4, offset) != 4 ||
>> -pwrite(fd, , 4, offset) != 4) {
>> -error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
>> +#define rom_read(var) VDEV_REGION_READ(vbasedev,  \
>> +   VFIO_PCI_CONFIG_REGION_INDEX,  \
>> +   PCI_ROM_ADDRESS, 4, (var))
>> +#define rom_write(var) VDEV_REGION_WRITE(vbasedev, 

Re: [RFC v3 04/19] Add device IO ops vector

2021-11-19 Thread Alex Williamson
On Mon,  8 Nov 2021 16:46:32 -0800
John Johnson  wrote:

> Used for communication with VFIO driver
> (prep work for vfio-user, which will communicate over a socket)
> 
> Signed-off-by: John G Johnson 
> ---
>  include/hw/vfio/vfio-common.h |  28 
>  hw/vfio/common.c  | 159 
> ++
>  hw/vfio/pci.c | 146 --
>  3 files changed, 265 insertions(+), 68 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 9b3c5e5..43fa948 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -124,6 +124,7 @@ typedef struct VFIOHostDMAWindow {
>  } VFIOHostDMAWindow;
>  
>  typedef struct VFIODeviceOps VFIODeviceOps;
> +typedef struct VFIODevIO VFIODevIO;
>  
>  typedef struct VFIODevice {
>  QLIST_ENTRY(VFIODevice) next;
> @@ -139,12 +140,14 @@ typedef struct VFIODevice {
>  bool ram_block_discard_allowed;
>  bool enable_migration;
>  VFIODeviceOps *ops;
> +VFIODevIO *io_ops;
>  unsigned int num_irqs;
>  unsigned int num_regions;
>  unsigned int flags;
>  VFIOMigration *migration;
>  Error *migration_blocker;
>  OnOffAuto pre_copy_dirty_page_tracking;
> +struct vfio_region_info **regions;
>  } VFIODevice;
>  
>  struct VFIODeviceOps {
> @@ -164,6 +167,30 @@ struct VFIODeviceOps {
>   * through ioctl() to the kernel VFIO driver, but vfio-user
>   * can use a socket to a remote process.
>   */
> +struct VFIODevIO {
> +int (*get_info)(VFIODevice *vdev, struct vfio_device_info *info);
> +int (*get_region_info)(VFIODevice *vdev,
> +   struct vfio_region_info *info, int *fd);
> +int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
> +int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
> +int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t 
> size,
> +   void *data);
> +int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t 
> size,
> +void *data, bool post);

The @post arg is not really developed in this patch, it would make
review easier to add the arg when it becomes implemented and used.

> +};
> +
> +#define VDEV_GET_INFO(vdev, info) \
> +((vdev)->io_ops->get_info((vdev), (info)))
> +#define VDEV_GET_REGION_INFO(vdev, info, fd) \
> +((vdev)->io_ops->get_region_info((vdev), (info), (fd)))
> +#define VDEV_GET_IRQ_INFO(vdev, irq) \
> +((vdev)->io_ops->get_irq_info((vdev), (irq)))
> +#define VDEV_SET_IRQS(vdev, irqs) \
> +((vdev)->io_ops->set_irqs((vdev), (irqs)))
> +#define VDEV_REGION_READ(vdev, nr, off, size, data) \
> +((vdev)->io_ops->region_read((vdev), (nr), (off), (size), (data)))
> +#define VDEV_REGION_WRITE(vdev, nr, off, size, data, post) \
> +((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data), 
> (post)))
>  
>  struct VFIOContIO {
>  int (*dma_map)(VFIOContainer *container,
> @@ -184,6 +211,7 @@ struct VFIOContIO {
>  #define CONT_DIRTY_BITMAP(cont, bitmap, range) \
>  ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range)))
>  
> +extern VFIODevIO vfio_dev_io_ioctl;
>  extern VFIOContIO vfio_cont_io_ioctl;
>  
>  #endif /* CONFIG_LINUX */
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 50748be..41fdd78 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -70,7 +70,7 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index)
>  .count = 0,
>  };
>  
> -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
> +VDEV_SET_IRQS(vbasedev, _set);
>  }
>  
>  void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index)
> @@ -83,7 +83,7 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int 
> index)
>  .count = 1,
>  };
>  
> -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
> +VDEV_SET_IRQS(vbasedev, _set);
>  }
>  
>  void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index)
> @@ -96,7 +96,7 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int 
> index)
>  .count = 1,
>  };
>  
> -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
> +VDEV_SET_IRQS(vbasedev, _set);
>  }
>  
>  static inline const char *action_to_str(int action)
> @@ -177,9 +177,7 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int 
> index, int subindex,
>  pfd = (int32_t *)_set->data;
>  *pfd = fd;
>  
> -if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
> -ret = -errno;
> -}
> +ret = VDEV_SET_IRQS(vbasedev, irq_set);
>  g_free(irq_set);
>  
>  if (!ret) {
> @@ -214,6 +212,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
>  uint32_t dword;
>  uint64_t qword;
>  } buf;
> +int ret;
>  
>  switch (size) {
>  case 1:
> @@ -233,13 +232,15 @@ void vfio_region_write(void *opaque, hwaddr addr,
>  break;
>  }
>  
> -if (pwrite(vbasedev->fd, , 

[RFC v3 04/19] Add device IO ops vector

2021-11-08 Thread John Johnson
Used for communication with VFIO driver
(prep work for vfio-user, which will communicate over a socket)

Signed-off-by: John G Johnson 
---
 include/hw/vfio/vfio-common.h |  28 
 hw/vfio/common.c  | 159 ++
 hw/vfio/pci.c | 146 --
 3 files changed, 265 insertions(+), 68 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 9b3c5e5..43fa948 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -124,6 +124,7 @@ typedef struct VFIOHostDMAWindow {
 } VFIOHostDMAWindow;
 
 typedef struct VFIODeviceOps VFIODeviceOps;
+typedef struct VFIODevIO VFIODevIO;
 
 typedef struct VFIODevice {
 QLIST_ENTRY(VFIODevice) next;
@@ -139,12 +140,14 @@ typedef struct VFIODevice {
 bool ram_block_discard_allowed;
 bool enable_migration;
 VFIODeviceOps *ops;
+VFIODevIO *io_ops;
 unsigned int num_irqs;
 unsigned int num_regions;
 unsigned int flags;
 VFIOMigration *migration;
 Error *migration_blocker;
 OnOffAuto pre_copy_dirty_page_tracking;
+struct vfio_region_info **regions;
 } VFIODevice;
 
 struct VFIODeviceOps {
@@ -164,6 +167,30 @@ struct VFIODeviceOps {
  * through ioctl() to the kernel VFIO driver, but vfio-user
  * can use a socket to a remote process.
  */
+struct VFIODevIO {
+int (*get_info)(VFIODevice *vdev, struct vfio_device_info *info);
+int (*get_region_info)(VFIODevice *vdev,
+   struct vfio_region_info *info, int *fd);
+int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
+int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
+int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
+   void *data);
+int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
+void *data, bool post);
+};
+
+#define VDEV_GET_INFO(vdev, info) \
+((vdev)->io_ops->get_info((vdev), (info)))
+#define VDEV_GET_REGION_INFO(vdev, info, fd) \
+((vdev)->io_ops->get_region_info((vdev), (info), (fd)))
+#define VDEV_GET_IRQ_INFO(vdev, irq) \
+((vdev)->io_ops->get_irq_info((vdev), (irq)))
+#define VDEV_SET_IRQS(vdev, irqs) \
+((vdev)->io_ops->set_irqs((vdev), (irqs)))
+#define VDEV_REGION_READ(vdev, nr, off, size, data) \
+((vdev)->io_ops->region_read((vdev), (nr), (off), (size), (data)))
+#define VDEV_REGION_WRITE(vdev, nr, off, size, data, post) \
+((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data), (post)))
 
 struct VFIOContIO {
 int (*dma_map)(VFIOContainer *container,
@@ -184,6 +211,7 @@ struct VFIOContIO {
 #define CONT_DIRTY_BITMAP(cont, bitmap, range) \
 ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range)))
 
+extern VFIODevIO vfio_dev_io_ioctl;
 extern VFIOContIO vfio_cont_io_ioctl;
 
 #endif /* CONFIG_LINUX */
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 50748be..41fdd78 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -70,7 +70,7 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index)
 .count = 0,
 };
 
-ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
+VDEV_SET_IRQS(vbasedev, _set);
 }
 
 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index)
@@ -83,7 +83,7 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int 
index)
 .count = 1,
 };
 
-ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
+VDEV_SET_IRQS(vbasedev, _set);
 }
 
 void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index)
@@ -96,7 +96,7 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int 
index)
 .count = 1,
 };
 
-ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
+VDEV_SET_IRQS(vbasedev, _set);
 }
 
 static inline const char *action_to_str(int action)
@@ -177,9 +177,7 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, 
int subindex,
 pfd = (int32_t *)_set->data;
 *pfd = fd;
 
-if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
-ret = -errno;
-}
+ret = VDEV_SET_IRQS(vbasedev, irq_set);
 g_free(irq_set);
 
 if (!ret) {
@@ -214,6 +212,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
 uint32_t dword;
 uint64_t qword;
 } buf;
+int ret;
 
 switch (size) {
 case 1:
@@ -233,13 +232,15 @@ void vfio_region_write(void *opaque, hwaddr addr,
 break;
 }
 
-if (pwrite(vbasedev->fd, , size, region->fd_offset + addr) != size) {
+ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, , false);
+if (ret != size) {
+const char *err = ret < 0 ? strerror(-ret) : "short write";
+
 error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
- ",%d) failed: %m",
+ ",%d) failed: %s",
  __func__, vbasedev->name, region->nr,
- addr, data, size);
+