> -----Original Message----- > From: Qemu-devel <qemu-devel- > bounces+thanos.makatos=nutanix....@nongnu.org> On Behalf Of John > Johnson > Sent: 12 January 2022 00:44 > To: qemu-devel@nongnu.org > Subject: [RFC v4 20/21] vfio-user: migration support > > Signed-off-by: John G Johnson <john.g.john...@oracle.com> > Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> > Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> > --- > hw/vfio/user-protocol.h | 18 +++++++++++++++++ > hw/vfio/migration.c | 30 +++++++++++++-------------- > hw/vfio/pci.c | 7 +++++++ > hw/vfio/user.c | 54 > +++++++++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 93 insertions(+), 16 deletions(-) > > diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h > index 8932311..abe7002 100644 > --- a/hw/vfio/user-protocol.h > +++ b/hw/vfio/user-protocol.h > @@ -193,6 +193,10 @@ typedef struct { > char data[]; > } VFIOUserDMARW; > > +/* > + * VFIO_USER_DIRTY_PAGES > + */ > + > /*imported from struct vfio_bitmap */ > typedef struct { > uint64_t pgsize; > @@ -200,4 +204,18 @@ typedef struct { > char data[]; > } VFIOUserBitmap; > > +/* imported from struct vfio_iommu_type1_dirty_bitmap_get */ > +typedef struct { > + uint64_t iova; > + uint64_t size; > + VFIOUserBitmap bitmap; > +} VFIOUserBitmapRange; > + > +/* imported from struct vfio_iommu_type1_dirty_bitmap */ > +typedef struct { > + VFIOUserHdr hdr; > + uint32_t argsz; > + uint32_t flags; > +} VFIOUserDirtyPages; > + > #endif /* VFIO_USER_PROTOCOL_H */ > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index ff6b45d..df63f5c 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -27,6 +27,7 @@ > #include "pci.h" > #include "trace.h" > #include "hw/hw.h" > +#include "user.h" > > /* > * Flags to be used as unique delimiters for VFIO devices in the migration > @@ -49,11 +50,13 @@ static int64_t bytes_transferred; > static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, > off_t off, bool iswrite) > { > + VFIORegion *region = &vbasedev->migration->region; > int ret; > > - ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : > - pread(vbasedev->fd, val, count, off); > - if (ret < count) { > + ret = iswrite ? > + VDEV_REGION_WRITE(vbasedev, region->nr, off, count, val, false) : > + VDEV_REGION_READ(vbasedev, region->nr, off, count, val); > + if (ret < count) { > error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" > HWADDR_PRIx", err: %s", iswrite ? "write" : "read", > count, > vbasedev->name, off, strerror(errno)); > @@ -111,9 +114,7 @@ static int vfio_migration_set_state(VFIODevice > *vbasedev, uint32_t mask, > uint32_t value) > { > VFIOMigration *migration = vbasedev->migration; > - VFIORegion *region = &migration->region; > - off_t dev_state_off = region->fd_offset + > - VFIO_MIG_STRUCT_OFFSET(device_state); > + off_t dev_state_off = VFIO_MIG_STRUCT_OFFSET(device_state); > uint32_t device_state; > int ret; > > @@ -201,13 +202,13 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice > *vbasedev, uint64_t *size) > int ret; > > ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), > - region->fd_offset + > VFIO_MIG_STRUCT_OFFSET(data_offset)); > + VFIO_MIG_STRUCT_OFFSET(data_offset)); > if (ret < 0) { > return ret; > } > > ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), > - region->fd_offset + > VFIO_MIG_STRUCT_OFFSET(data_size)); > + VFIO_MIG_STRUCT_OFFSET(data_size)); > if (ret < 0) { > return ret; > } > @@ -233,8 +234,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice > *vbasedev, uint64_t *size) > } > buf_allocated = true; > > - ret = vfio_mig_read(vbasedev, buf, sec_size, > - region->fd_offset + data_offset); > + ret = vfio_mig_read(vbasedev, buf, sec_size, data_offset); > if (ret < 0) { > g_free(buf); > return ret; > @@ -269,7 +269,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice > *vbasedev, > > do { > ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), > - region->fd_offset + > VFIO_MIG_STRUCT_OFFSET(data_offset)); > + VFIO_MIG_STRUCT_OFFSET(data_offset)); > if (ret < 0) { > return ret; > } > @@ -309,8 +309,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice > *vbasedev, > qemu_get_buffer(f, buf, sec_size); > > if (buf_alloc) { > - ret = vfio_mig_write(vbasedev, buf, sec_size, > - region->fd_offset + data_offset); > + ret = vfio_mig_write(vbasedev, buf, sec_size, data_offset); > g_free(buf); > > if (ret < 0) { > @@ -322,7 +321,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice > *vbasedev, > } > > ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), > - region->fd_offset + > VFIO_MIG_STRUCT_OFFSET(data_size)); > + VFIO_MIG_STRUCT_OFFSET(data_size)); > if (ret < 0) { > return ret; > } > @@ -334,12 +333,11 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice > *vbasedev, > static int vfio_update_pending(VFIODevice *vbasedev) > { > VFIOMigration *migration = vbasedev->migration; > - VFIORegion *region = &migration->region; > uint64_t pending_bytes = 0; > int ret; > > ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), > - region->fd_offset + > VFIO_MIG_STRUCT_OFFSET(pending_bytes)); > + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); > if (ret < 0) { > migration->pending_bytes = 0; > return ret; > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index d47b98e..598e9ed 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -3677,6 +3677,13 @@ static void vfio_user_pci_realize(PCIDevice *pdev, > Error **errp) > goto out_teardown; > } > > + if (!pdev->failover_pair_id) { > + ret = vfio_migration_probe(&vdev->vbasedev, errp); > + if (ret) { > + error_report("%s: Migration disabled", vdev->vbasedev.name); > + } > + } > + > vfio_register_err_notifier(vdev); > vfio_register_req_notifier(vdev); > > diff --git a/hw/vfio/user.c b/hw/vfio/user.c > index 33d8f06..2eac62a 100644 > --- a/hw/vfio/user.c > +++ b/hw/vfio/user.c > @@ -1410,6 +1410,52 @@ void vfio_user_reset(VFIOProxy *proxy) > } > } > > +static int vfio_user_dirty_bitmap(VFIOProxy *proxy, > + struct vfio_iommu_type1_dirty_bitmap *cmd, > + struct vfio_iommu_type1_dirty_bitmap_get > + *dbitmap) > +{ > + g_autofree struct { > + VFIOUserDirtyPages msg; > + VFIOUserBitmapRange range; > + } *msgp = NULL; > + int msize, rsize; > + > + /* > + * If just the command is sent, the returned bitmap isn't needed. > + * The bitmap structs are different from the ioctl() versions, > + * ioctl() returns the bitmap in a local VA > + */ > + if (dbitmap != NULL) { > + msize = sizeof(*msgp); > + rsize = msize + dbitmap->bitmap.size; > + msgp = g_malloc0(rsize); > + msgp->range.iova = dbitmap->iova; > + msgp->range.size = dbitmap->size; > + msgp->range.bitmap.pgsize = dbitmap->bitmap.pgsize; > + msgp->range.bitmap.size = dbitmap->bitmap.size; > + } else { > + msize = rsize = sizeof(VFIOUserDirtyPages); > + msgp = g_malloc0(rsize); > + } > + > + vfio_user_request_msg(&msgp->msg.hdr, VFIO_USER_DIRTY_PAGES, msize, > 0); > + msgp->msg.argsz = rsize - sizeof(VFIOUserHdr); > + msgp->msg.flags = cmd->flags; > + > + vfio_user_send_wait(proxy, &msgp->msg.hdr, NULL, rsize, false); > + if (msgp->msg.hdr.flags & VFIO_USER_ERROR) { > + return -msgp->msg.hdr.error_reply; > + }
We need to check argsz in the response, in which case the client needs to retry with a larger argsz. > + > + if (dbitmap != NULL) { > + memcpy(dbitmap->bitmap.data, &msgp->range.bitmap.data, > + dbitmap->bitmap.size); > + } > + > + return 0; > +} > + > > /* > * Socket-based io_ops > @@ -1530,6 +1576,13 @@ static int vfio_user_io_dma_unmap(VFIOContainer > *container, > container->async_ops); > } > > +static int vfio_user_io_dirty_bitmap(VFIOContainer *container, > + struct vfio_iommu_type1_dirty_bitmap *bitmap, > + struct vfio_iommu_type1_dirty_bitmap_get *range) > +{ > + return vfio_user_dirty_bitmap(container->proxy, bitmap, range); > +} > + > static void vfio_user_io_wait_commit(VFIOContainer *container) > { > vfio_user_wait_reqs(container->proxy); > @@ -1538,5 +1591,6 @@ static void vfio_user_io_wait_commit(VFIOContainer > *container) > VFIOContIO vfio_cont_io_sock = { > .dma_map = vfio_user_io_dma_map, > .dma_unmap = vfio_user_io_dma_unmap, > + .dirty_bitmap = vfio_user_io_dirty_bitmap, > .wait_commit = vfio_user_io_wait_commit, > }; > -- > 1.8.3.1 >