Re: [PATCH QEMU v25 08/17] vfio: Add save state functions to SaveVMHandlers

2020-06-23 Thread Alex Williamson
On Wed, 24 Jun 2020 02:04:24 +0530
Kirti Wankhede  wrote:

> On 6/23/2020 4:20 AM, Alex Williamson wrote:
> > On Sun, 21 Jun 2020 01:51:17 +0530
> > Kirti Wankhede  wrote:
> >   
> >> Added .save_live_pending, .save_live_iterate and 
> >> .save_live_complete_precopy
> >> functions. These functions handles pre-copy and stop-and-copy phase.
> >>
> >> In _SAVING|_RUNNING device state or pre-copy phase:
> >> - read pending_bytes. If pending_bytes > 0, go through below steps.
> >> - read data_offset - indicates kernel driver to write data to staging
> >>buffer.
> >> - read data_size - amount of data in bytes written by vendor driver in
> >>migration region.
> >> - read data_size bytes of data from data_offset in the migration region.
> >> - Write data packet to file stream as below:
> >> {VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data,
> >> VFIO_MIG_FLAG_END_OF_STATE }
> >>
> >> In _SAVING device state or stop-and-copy phase
> >> a. read config space of device and save to migration file stream. This
> >> doesn't need to be from vendor driver. Any other special config state
> >> from driver can be saved as data in following iteration.
> >> b. read pending_bytes. If pending_bytes > 0, go through below steps.
> >> c. read data_offset - indicates kernel driver to write data to staging
> >> buffer.
> >> d. read data_size - amount of data in bytes written by vendor driver in
> >> migration region.
> >> e. read data_size bytes of data from data_offset in the migration region.
> >> f. Write data packet as below:
> >> {VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data}
> >> g. iterate through steps b to f while (pending_bytes > 0)
> >> h. Write {VFIO_MIG_FLAG_END_OF_STATE}
> >>
> >> When data region is mapped, its user's responsibility to read data from
> >> data_offset of data_size before moving to next steps.
> >>
> >> Signed-off-by: Kirti Wankhede 
> >> Reviewed-by: Neo Jia 
> >> ---
> >>   hw/vfio/migration.c   | 283 
> >> ++
> >>   hw/vfio/trace-events  |   6 +
> >>   include/hw/vfio/vfio-common.h |   1 +
> >>   3 files changed, 290 insertions(+)
> >>
> >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> >> index 133bb5b1b3b2..ef1150c1ff02 100644
> >> --- a/hw/vfio/migration.c
> >> +++ b/hw/vfio/migration.c
> >> @@ -140,6 +140,168 @@ static int vfio_migration_set_state(VFIODevice 
> >> *vbasedev, uint32_t mask,
> >>   return 0;
> >>   }
> >>   
> >> +static void *get_data_section_size(VFIORegion *region, uint64_t 
> >> data_offset,
> >> +   uint64_t data_size, uint64_t *size)
> >> +{
> >> +void *ptr = NULL;
> >> +int i;
> >> +
> >> +if (!region->mmaps) {
> >> +*size = data_size;
> >> +return ptr;
> >> +}
> >> +
> >> +/* check if data_offset in within sparse mmap areas */
> >> +for (i = 0; i < region->nr_mmaps; i++) {
> >> +VFIOMmap *map = region->mmaps + i;
> >> +
> >> +if ((data_offset >= map->offset) &&
> >> +(data_offset < map->offset + map->size)) {
> >> +ptr = map->mmap + data_offset - map->offset;
> >> +
> >> +if (data_offset + data_size <= map->offset + map->size) {
> >> +*size = data_size;
> >> +} else {
> >> +*size = map->offset + map->size - data_offset;
> >> +}  
> > 
> > Ultimately we take whichever result is smaller, so we could just use:
> > 
> > *size = MIN(data_size, map->offset + map->size - data_offset);
> >   
> >> +break;
> >> +}
> >> +}
> >> +
> >> +if (!ptr) {
> >> +uint64_t limit = 0;
> >> +
> >> +/*
> >> + * data_offset is not within sparse mmap areas, find size of 
> >> non-mapped
> >> + * area. Check through all list since region->mmaps list is not 
> >> sorted.
> >> + */
> >> +for (i = 0; i < region->nr_mmaps; i++) {
> >> +VFIOMmap *map = region->mmaps + i;
> >> +
> >> +if ((data_offset < map->offset) &&
> >> +(!limit || limit > map->offset)) {
> >> +limit = map->offset;
> >> +}  
> > 
> > We could have done this in an else branch of the previous loop to avoid
> > walking the entries twice.
> >   
> 
> Ok. updating with above 2 changes.
> 
> >> +}
> >> +
> >> +*size = limit ? limit - data_offset : data_size;
> >> +}
> >> +return ptr;
> >> +}
> >> +
> >> +static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev)
> >> +{
> >> +VFIOMigration *migration = vbasedev->migration;
> >> +VFIORegion *region = >region;
> >> +uint64_t data_offset = 0, data_size = 0, size;
> >> +int ret;
> >> +
> >> +ret = pread(vbasedev->fd, _offset, sizeof(data_offset),
> >> +region->fd_offset + offsetof(struct 
> >> vfio_device_migration_info,
> >> + data_offset));
> >> +if (ret != 

Re: [PATCH QEMU v25 08/17] vfio: Add save state functions to SaveVMHandlers

2020-06-23 Thread Kirti Wankhede




On 6/23/2020 4:20 AM, Alex Williamson wrote:

On Sun, 21 Jun 2020 01:51:17 +0530
Kirti Wankhede  wrote:


Added .save_live_pending, .save_live_iterate and .save_live_complete_precopy
functions. These functions handles pre-copy and stop-and-copy phase.

In _SAVING|_RUNNING device state or pre-copy phase:
- read pending_bytes. If pending_bytes > 0, go through below steps.
- read data_offset - indicates kernel driver to write data to staging
   buffer.
- read data_size - amount of data in bytes written by vendor driver in
   migration region.
- read data_size bytes of data from data_offset in the migration region.
- Write data packet to file stream as below:
{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data,
VFIO_MIG_FLAG_END_OF_STATE }

In _SAVING device state or stop-and-copy phase
a. read config space of device and save to migration file stream. This
doesn't need to be from vendor driver. Any other special config state
from driver can be saved as data in following iteration.
b. read pending_bytes. If pending_bytes > 0, go through below steps.
c. read data_offset - indicates kernel driver to write data to staging
buffer.
d. read data_size - amount of data in bytes written by vendor driver in
migration region.
e. read data_size bytes of data from data_offset in the migration region.
f. Write data packet as below:
{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data}
g. iterate through steps b to f while (pending_bytes > 0)
h. Write {VFIO_MIG_FLAG_END_OF_STATE}

When data region is mapped, its user's responsibility to read data from
data_offset of data_size before moving to next steps.

Signed-off-by: Kirti Wankhede 
Reviewed-by: Neo Jia 
---
  hw/vfio/migration.c   | 283 ++
  hw/vfio/trace-events  |   6 +
  include/hw/vfio/vfio-common.h |   1 +
  3 files changed, 290 insertions(+)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 133bb5b1b3b2..ef1150c1ff02 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -140,6 +140,168 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, 
uint32_t mask,
  return 0;
  }
  
+static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,

+   uint64_t data_size, uint64_t *size)
+{
+void *ptr = NULL;
+int i;
+
+if (!region->mmaps) {
+*size = data_size;
+return ptr;
+}
+
+/* check if data_offset in within sparse mmap areas */
+for (i = 0; i < region->nr_mmaps; i++) {
+VFIOMmap *map = region->mmaps + i;
+
+if ((data_offset >= map->offset) &&
+(data_offset < map->offset + map->size)) {
+ptr = map->mmap + data_offset - map->offset;
+
+if (data_offset + data_size <= map->offset + map->size) {
+*size = data_size;
+} else {
+*size = map->offset + map->size - data_offset;
+}


Ultimately we take whichever result is smaller, so we could just use:

*size = MIN(data_size, map->offset + map->size - data_offset);


+break;
+}
+}
+
+if (!ptr) {
+uint64_t limit = 0;
+
+/*
+ * data_offset is not within sparse mmap areas, find size of non-mapped
+ * area. Check through all list since region->mmaps list is not sorted.
+ */
+for (i = 0; i < region->nr_mmaps; i++) {
+VFIOMmap *map = region->mmaps + i;
+
+if ((data_offset < map->offset) &&
+(!limit || limit > map->offset)) {
+limit = map->offset;
+}


We could have done this in an else branch of the previous loop to avoid
walking the entries twice.



Ok. updating with above 2 changes.


+}
+
+*size = limit ? limit - data_offset : data_size;
+}
+return ptr;
+}
+
+static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev)
+{
+VFIOMigration *migration = vbasedev->migration;
+VFIORegion *region = >region;
+uint64_t data_offset = 0, data_size = 0, size;
+int ret;
+
+ret = pread(vbasedev->fd, _offset, sizeof(data_offset),
+region->fd_offset + offsetof(struct vfio_device_migration_info,
+ data_offset));
+if (ret != sizeof(data_offset)) {
+error_report("%s: Failed to get migration buffer data offset %d",
+ vbasedev->name, ret);
+return -EINVAL;
+}
+
+ret = pread(vbasedev->fd, _size, sizeof(data_size),
+region->fd_offset + offsetof(struct vfio_device_migration_info,
+ data_size));
+if (ret != sizeof(data_size)) {
+error_report("%s: Failed to get migration buffer data size %d",
+ vbasedev->name, ret);
+return -EINVAL;
+}
+
+trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
+   migration->pending_bytes);
+
+

Re: [PATCH QEMU v25 08/17] vfio: Add save state functions to SaveVMHandlers

2020-06-22 Thread Alex Williamson
On Sun, 21 Jun 2020 01:51:17 +0530
Kirti Wankhede  wrote:

> Added .save_live_pending, .save_live_iterate and .save_live_complete_precopy
> functions. These functions handles pre-copy and stop-and-copy phase.
> 
> In _SAVING|_RUNNING device state or pre-copy phase:
> - read pending_bytes. If pending_bytes > 0, go through below steps.
> - read data_offset - indicates kernel driver to write data to staging
>   buffer.
> - read data_size - amount of data in bytes written by vendor driver in
>   migration region.
> - read data_size bytes of data from data_offset in the migration region.
> - Write data packet to file stream as below:
> {VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data,
> VFIO_MIG_FLAG_END_OF_STATE }
> 
> In _SAVING device state or stop-and-copy phase
> a. read config space of device and save to migration file stream. This
>doesn't need to be from vendor driver. Any other special config state
>from driver can be saved as data in following iteration.
> b. read pending_bytes. If pending_bytes > 0, go through below steps.
> c. read data_offset - indicates kernel driver to write data to staging
>buffer.
> d. read data_size - amount of data in bytes written by vendor driver in
>migration region.
> e. read data_size bytes of data from data_offset in the migration region.
> f. Write data packet as below:
>{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data}
> g. iterate through steps b to f while (pending_bytes > 0)
> h. Write {VFIO_MIG_FLAG_END_OF_STATE}
> 
> When data region is mapped, its user's responsibility to read data from
> data_offset of data_size before moving to next steps.
> 
> Signed-off-by: Kirti Wankhede 
> Reviewed-by: Neo Jia 
> ---
>  hw/vfio/migration.c   | 283 
> ++
>  hw/vfio/trace-events  |   6 +
>  include/hw/vfio/vfio-common.h |   1 +
>  3 files changed, 290 insertions(+)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 133bb5b1b3b2..ef1150c1ff02 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -140,6 +140,168 @@ static int vfio_migration_set_state(VFIODevice 
> *vbasedev, uint32_t mask,
>  return 0;
>  }
>  
> +static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
> +   uint64_t data_size, uint64_t *size)
> +{
> +void *ptr = NULL;
> +int i;
> +
> +if (!region->mmaps) {
> +*size = data_size;
> +return ptr;
> +}
> +
> +/* check if data_offset in within sparse mmap areas */
> +for (i = 0; i < region->nr_mmaps; i++) {
> +VFIOMmap *map = region->mmaps + i;
> +
> +if ((data_offset >= map->offset) &&
> +(data_offset < map->offset + map->size)) {
> +ptr = map->mmap + data_offset - map->offset;
> +
> +if (data_offset + data_size <= map->offset + map->size) {
> +*size = data_size;
> +} else {
> +*size = map->offset + map->size - data_offset;
> +}

Ultimately we take whichever result is smaller, so we could just use:

*size = MIN(data_size, map->offset + map->size - data_offset);

> +break;
> +}
> +}
> +
> +if (!ptr) {
> +uint64_t limit = 0;
> +
> +/*
> + * data_offset is not within sparse mmap areas, find size of 
> non-mapped
> + * area. Check through all list since region->mmaps list is not 
> sorted.
> + */
> +for (i = 0; i < region->nr_mmaps; i++) {
> +VFIOMmap *map = region->mmaps + i;
> +
> +if ((data_offset < map->offset) &&
> +(!limit || limit > map->offset)) {
> +limit = map->offset;
> +}

We could have done this in an else branch of the previous loop to avoid
walking the entries twice.

> +}
> +
> +*size = limit ? limit - data_offset : data_size;
> +}
> +return ptr;
> +}
> +
> +static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev)
> +{
> +VFIOMigration *migration = vbasedev->migration;
> +VFIORegion *region = >region;
> +uint64_t data_offset = 0, data_size = 0, size;
> +int ret;
> +
> +ret = pread(vbasedev->fd, _offset, sizeof(data_offset),
> +region->fd_offset + offsetof(struct 
> vfio_device_migration_info,
> + data_offset));
> +if (ret != sizeof(data_offset)) {
> +error_report("%s: Failed to get migration buffer data offset %d",
> + vbasedev->name, ret);
> +return -EINVAL;
> +}
> +
> +ret = pread(vbasedev->fd, _size, sizeof(data_size),
> +region->fd_offset + offsetof(struct 
> vfio_device_migration_info,
> + data_size));
> +if (ret != sizeof(data_size)) {
> +error_report("%s: Failed to get migration buffer data size %d",
> + vbasedev->name, ret);
> +

[PATCH QEMU v25 08/17] vfio: Add save state functions to SaveVMHandlers

2020-06-20 Thread Kirti Wankhede
Added .save_live_pending, .save_live_iterate and .save_live_complete_precopy
functions. These functions handles pre-copy and stop-and-copy phase.

In _SAVING|_RUNNING device state or pre-copy phase:
- read pending_bytes. If pending_bytes > 0, go through below steps.
- read data_offset - indicates kernel driver to write data to staging
  buffer.
- read data_size - amount of data in bytes written by vendor driver in
  migration region.
- read data_size bytes of data from data_offset in the migration region.
- Write data packet to file stream as below:
{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data,
VFIO_MIG_FLAG_END_OF_STATE }

In _SAVING device state or stop-and-copy phase
a. read config space of device and save to migration file stream. This
   doesn't need to be from vendor driver. Any other special config state
   from driver can be saved as data in following iteration.
b. read pending_bytes. If pending_bytes > 0, go through below steps.
c. read data_offset - indicates kernel driver to write data to staging
   buffer.
d. read data_size - amount of data in bytes written by vendor driver in
   migration region.
e. read data_size bytes of data from data_offset in the migration region.
f. Write data packet as below:
   {VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data}
g. iterate through steps b to f while (pending_bytes > 0)
h. Write {VFIO_MIG_FLAG_END_OF_STATE}

When data region is mapped, its user's responsibility to read data from
data_offset of data_size before moving to next steps.

Signed-off-by: Kirti Wankhede 
Reviewed-by: Neo Jia 
---
 hw/vfio/migration.c   | 283 ++
 hw/vfio/trace-events  |   6 +
 include/hw/vfio/vfio-common.h |   1 +
 3 files changed, 290 insertions(+)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 133bb5b1b3b2..ef1150c1ff02 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -140,6 +140,168 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, 
uint32_t mask,
 return 0;
 }
 
+static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
+   uint64_t data_size, uint64_t *size)
+{
+void *ptr = NULL;
+int i;
+
+if (!region->mmaps) {
+*size = data_size;
+return ptr;
+}
+
+/* check if data_offset in within sparse mmap areas */
+for (i = 0; i < region->nr_mmaps; i++) {
+VFIOMmap *map = region->mmaps + i;
+
+if ((data_offset >= map->offset) &&
+(data_offset < map->offset + map->size)) {
+ptr = map->mmap + data_offset - map->offset;
+
+if (data_offset + data_size <= map->offset + map->size) {
+*size = data_size;
+} else {
+*size = map->offset + map->size - data_offset;
+}
+break;
+}
+}
+
+if (!ptr) {
+uint64_t limit = 0;
+
+/*
+ * data_offset is not within sparse mmap areas, find size of non-mapped
+ * area. Check through all list since region->mmaps list is not sorted.
+ */
+for (i = 0; i < region->nr_mmaps; i++) {
+VFIOMmap *map = region->mmaps + i;
+
+if ((data_offset < map->offset) &&
+(!limit || limit > map->offset)) {
+limit = map->offset;
+}
+}
+
+*size = limit ? limit - data_offset : data_size;
+}
+return ptr;
+}
+
+static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev)
+{
+VFIOMigration *migration = vbasedev->migration;
+VFIORegion *region = >region;
+uint64_t data_offset = 0, data_size = 0, size;
+int ret;
+
+ret = pread(vbasedev->fd, _offset, sizeof(data_offset),
+region->fd_offset + offsetof(struct vfio_device_migration_info,
+ data_offset));
+if (ret != sizeof(data_offset)) {
+error_report("%s: Failed to get migration buffer data offset %d",
+ vbasedev->name, ret);
+return -EINVAL;
+}
+
+ret = pread(vbasedev->fd, _size, sizeof(data_size),
+region->fd_offset + offsetof(struct vfio_device_migration_info,
+ data_size));
+if (ret != sizeof(data_size)) {
+error_report("%s: Failed to get migration buffer data size %d",
+ vbasedev->name, ret);
+return -EINVAL;
+}
+
+trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
+   migration->pending_bytes);
+
+qemu_put_be64(f, data_size);
+size = data_size;
+
+while (size) {
+void *buf = NULL;
+bool buffer_mmaped;
+uint64_t sec_size;
+
+buf = get_data_section_size(region, data_offset, size, _size);
+
+buffer_mmaped = (buf != NULL);
+
+if (!buffer_mmaped) {
+buf = g_try_malloc(sec_size);
+if (!buf) {
+