Re: [PATCH v9 QEMU 11/15] vfio: Add save state functions to SaveVMHandlers

2019-11-13 Thread Alex Williamson
On Tue, 12 Nov 2019 22:35:20 +0530
Kirti Wankhede  wrote:

> Added .save_live_pending, .save_live_iterate and .save_live_complete_precopy
> functions. These functions handles pre-copy and stop-and-copy phase.
> 
> In _SAVING|_RUNNING device state or pre-copy phase:
> - read pending_bytes. If pending_bytes > 0, go through below steps.
> - read data_offset - indicates kernel driver to write data to staging
>   buffer.
> - read data_size - amount of data in bytes written by vendor driver in
>   migration region.
> - read data_size bytes of data from data_offset in the migration region.
> - Write data packet to file stream as below:
> {VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data,
> VFIO_MIG_FLAG_END_OF_STATE }
> 
> In _SAVING device state or stop-and-copy phase
> a. read config space of device and save to migration file stream. This
>doesn't need to be from vendor driver. Any other special config state
>from driver can be saved as data in following iteration.
> b. read pending_bytes. If pending_bytes > 0, go through below steps.
> c. read data_offset - indicates kernel driver to write data to staging
>buffer.
> d. read data_size - amount of data in bytes written by vendor driver in
>migration region.
> e. read data_size bytes of data from data_offset in the migration region.
> f. Write data packet as below:
>{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data}
> g. iterate through steps b to f while (pending_bytes > 0)
> h. Write {VFIO_MIG_FLAG_END_OF_STATE}
> 
> When data region is mapped, its user's responsibility to read data from

s/mapped/made available/

"mapped" is confusing given the mmap'd features.

> data_offset of data_size before moving to next steps.
>
> Signed-off-by: Kirti Wankhede 
> Reviewed-by: Neo Jia 
> ---
>  hw/vfio/migration.c  | 245 
> ++-
>  hw/vfio/trace-events |   6 ++
>  2 files changed, 250 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 48aac6d29876..f890e864e174 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -120,6 +120,137 @@ static int vfio_migration_set_state(VFIODevice 
> *vbasedev, uint32_t set_flags,
>  return 0;
>  }
>  
> +static void *find_data_region(VFIORegion *region,
> +  uint64_t data_offset,
> +  uint64_t data_size)
> +{
> +void *ptr = NULL;
> +int i;
> +
> +for (i = 0; i < region->nr_mmaps; i++) {
> +if ((data_offset >= region->mmaps[i].offset) &&
> +(data_offset < region->mmaps[i].offset + region->mmaps[i].size) 
> &&
> +(data_size <= region->mmaps[i].size)) {

data_offset is determined to live somewhere within the mmap and
data_size is independently determined to be smaller than the entire
mmaps size.  This is broken.

> +ptr = region->mmaps[i].mmap + (data_offset -
> +   region->mmaps[i].offset);

If the data offset is mmap'd, this gives us a pointer to the start, but
we have no idea if the entire range is accessible via this pointer, nor
does the API require it to be.

> +break;
> +}
> +}
> +return ptr;
> +}
> +
> +static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev)
> +{
> +VFIOMigration *migration = vbasedev->migration;
> +VFIORegion *region = >region;
> +uint64_t data_offset = 0, data_size = 0;
> +int ret;
> +
> +ret = pread(vbasedev->fd, _offset, sizeof(data_offset),
> +region->fd_offset + offsetof(struct 
> vfio_device_migration_info,
> + data_offset));
> +if (ret != sizeof(data_offset)) {
> +error_report("%s: Failed to get migration buffer data offset %d",
> + vbasedev->name, ret);
> +return -EINVAL;
> +}
> +
> +ret = pread(vbasedev->fd, _size, sizeof(data_size),
> +region->fd_offset + offsetof(struct 
> vfio_device_migration_info,
> + data_size));
> +if (ret != sizeof(data_size)) {
> +error_report("%s: Failed to get migration buffer data size %d",
> + vbasedev->name, ret);
> +return -EINVAL;
> +}
> +
> +if (data_size > 0) {
> +void *buf = NULL;
> +bool buffer_mmaped;
> +
> +if (region->mmaps) {
> +buf = find_data_region(region, data_offset, data_size);
> +}
> +
> +buffer_mmaped = (buf != NULL) ? true : false;
> +
> +if (!buffer_mmaped) {
> +buf = g_try_malloc0(data_size);
> +if (!buf) {
> +error_report("%s: Error allocating buffer ", __func__);
> +return -ENOMEM;
> +}
> +
> +ret = pread(vbasedev->fd, buf, data_size,
> +region->fd_offset + data_offset);
> +if (ret != data_size) {
> +

[PATCH v9 QEMU 11/15] vfio: Add save state functions to SaveVMHandlers

2019-11-12 Thread Kirti Wankhede
Added .save_live_pending, .save_live_iterate and .save_live_complete_precopy
functions. These functions handles pre-copy and stop-and-copy phase.

In _SAVING|_RUNNING device state or pre-copy phase:
- read pending_bytes. If pending_bytes > 0, go through below steps.
- read data_offset - indicates kernel driver to write data to staging
  buffer.
- read data_size - amount of data in bytes written by vendor driver in
  migration region.
- read data_size bytes of data from data_offset in the migration region.
- Write data packet to file stream as below:
{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data,
VFIO_MIG_FLAG_END_OF_STATE }

In _SAVING device state or stop-and-copy phase
a. read config space of device and save to migration file stream. This
   doesn't need to be from vendor driver. Any other special config state
   from driver can be saved as data in following iteration.
b. read pending_bytes. If pending_bytes > 0, go through below steps.
c. read data_offset - indicates kernel driver to write data to staging
   buffer.
d. read data_size - amount of data in bytes written by vendor driver in
   migration region.
e. read data_size bytes of data from data_offset in the migration region.
f. Write data packet as below:
   {VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data}
g. iterate through steps b to f while (pending_bytes > 0)
h. Write {VFIO_MIG_FLAG_END_OF_STATE}

When data region is mapped, its user's responsibility to read data from
data_offset of data_size before moving to next steps.

Signed-off-by: Kirti Wankhede 
Reviewed-by: Neo Jia 
---
 hw/vfio/migration.c  | 245 ++-
 hw/vfio/trace-events |   6 ++
 2 files changed, 250 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 48aac6d29876..f890e864e174 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -120,6 +120,137 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, 
uint32_t set_flags,
 return 0;
 }
 
+static void *find_data_region(VFIORegion *region,
+  uint64_t data_offset,
+  uint64_t data_size)
+{
+void *ptr = NULL;
+int i;
+
+for (i = 0; i < region->nr_mmaps; i++) {
+if ((data_offset >= region->mmaps[i].offset) &&
+(data_offset < region->mmaps[i].offset + region->mmaps[i].size) &&
+(data_size <= region->mmaps[i].size)) {
+ptr = region->mmaps[i].mmap + (data_offset -
+   region->mmaps[i].offset);
+break;
+}
+}
+return ptr;
+}
+
+static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev)
+{
+VFIOMigration *migration = vbasedev->migration;
+VFIORegion *region = >region;
+uint64_t data_offset = 0, data_size = 0;
+int ret;
+
+ret = pread(vbasedev->fd, _offset, sizeof(data_offset),
+region->fd_offset + offsetof(struct vfio_device_migration_info,
+ data_offset));
+if (ret != sizeof(data_offset)) {
+error_report("%s: Failed to get migration buffer data offset %d",
+ vbasedev->name, ret);
+return -EINVAL;
+}
+
+ret = pread(vbasedev->fd, _size, sizeof(data_size),
+region->fd_offset + offsetof(struct vfio_device_migration_info,
+ data_size));
+if (ret != sizeof(data_size)) {
+error_report("%s: Failed to get migration buffer data size %d",
+ vbasedev->name, ret);
+return -EINVAL;
+}
+
+if (data_size > 0) {
+void *buf = NULL;
+bool buffer_mmaped;
+
+if (region->mmaps) {
+buf = find_data_region(region, data_offset, data_size);
+}
+
+buffer_mmaped = (buf != NULL) ? true : false;
+
+if (!buffer_mmaped) {
+buf = g_try_malloc0(data_size);
+if (!buf) {
+error_report("%s: Error allocating buffer ", __func__);
+return -ENOMEM;
+}
+
+ret = pread(vbasedev->fd, buf, data_size,
+region->fd_offset + data_offset);
+if (ret != data_size) {
+error_report("%s: Failed to get migration data %d",
+ vbasedev->name, ret);
+g_free(buf);
+return -EINVAL;
+}
+}
+
+qemu_put_be64(f, data_size);
+qemu_put_buffer(f, buf, data_size);
+
+if (!buffer_mmaped) {
+g_free(buf);
+}
+} else {
+qemu_put_be64(f, data_size);
+}
+
+trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
+   migration->pending_bytes);
+
+ret = qemu_file_get_error(f);
+if (ret) {
+return ret;
+}
+
+return data_size;
+}
+
+static int vfio_update_pending(VFIODevice *vbasedev)
+{
+VFIOMigration