On 5/21/2020 7:48 PM, Dr. David Alan Gilbert wrote:
* Kirti Wankhede (kwankh...@nvidia.com) wrote:
Define flags to be used as delimeter in migration file stream.
Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
region from these functions at source during saving or pre-copy phase.
Set VFIO device state depending on VM's state. During live migration, VM is
running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
device. During save-restore, VM is paused, _SAVING state is set for VFIO device.

Signed-off-by: Kirti Wankhede <kwankh...@nvidia.com>
Reviewed-by: Neo Jia <c...@nvidia.com>
---
  hw/vfio/migration.c  | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
  hw/vfio/trace-events |  2 ++
  2 files changed, 75 insertions(+)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index c2f5564b51c3..773c8d16b1c1 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -8,12 +8,14 @@
   */
#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
  #include <linux/vfio.h>
#include "sysemu/runstate.h"
  #include "hw/vfio/vfio-common.h"
  #include "cpu.h"
  #include "migration/migration.h"
+#include "migration/vmstate.h"
  #include "migration/qemu-file.h"
  #include "migration/register.h"
  #include "migration/blocker.h"
@@ -24,6 +26,17 @@
  #include "pci.h"
  #include "trace.h"
+/*
+ * Flags used as delimiter:
+ * 0xffffffff => MSB 32-bit all 1s
+ * 0xef10     => emulated (virtual) function IO
+ * 0x0000     => 16-bits reserved for flags
+ */
+#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
+#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
+#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
+
  static void vfio_migration_region_exit(VFIODevice *vbasedev)
  {
      VFIOMigration *migration = vbasedev->migration;
@@ -126,6 +139,64 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, 
uint32_t mask,
      return 0;
  }
+/* ---------------------------------------------------------------------- */
+
+static int vfio_save_setup(QEMUFile *f, void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    int ret;
+
+    trace_vfio_save_setup(vbasedev->name);
+
+    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
+
+    if (migration->region.mmaps) {
+        qemu_mutex_lock_iothread();
+        ret = vfio_region_mmap(&migration->region);
+        qemu_mutex_unlock_iothread();
+        if (ret) {
+            error_report("%s: Failed to mmap VFIO migration region %d: %s",
+                         vbasedev->name, migration->region.index,
+                         strerror(-ret));
+            return ret;
+        }
+    }
+
+    ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING);
+    if (ret) {
+        error_report("%s: Failed to set state SAVING", vbasedev->name);
+        return ret;
+    }
+
+    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+
+    ret = qemu_file_get_error(f);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static void vfio_save_cleanup(void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+
+    if (migration->region.mmaps) {
+        vfio_region_unmap(&migration->region);
+    }
+    trace_vfio_save_cleanup(vbasedev->name);
+}
+
+static SaveVMHandlers savevm_vfio_handlers = {
+    .save_setup = vfio_save_setup,
+    .save_cleanup = vfio_save_cleanup,
+};
+
+/* ---------------------------------------------------------------------- */
+
  static void vfio_vmstate_change(void *opaque, int running, RunState state)
  {
      VFIODevice *vbasedev = opaque;
@@ -192,6 +263,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
          return ret;
      }
+ register_savevm_live("vfio", VMSTATE_INSTANCE_ID_ANY, 1,
+                         &savevm_vfio_handlers, vbasedev);

Hi,
   This is still the only bit which worries me, and I saw your note
saying you'd tested it; to calm my nerves, can you run with the
'qemu_loadvm_state_section_startfull' trace enabled with 2 devices
and show me the output and qemu command line?
I'm trying to figure out how they end up represented in the stream.


Created mtty devices for source VM:
echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1233" > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1234" > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create

for destination VM:
echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1235" > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1236" > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create

Source qemu-cmdline:
/usr/libexec/qemu-kvm \
 -name guest=rhel75-mig,debug-threads=on \
 -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \
 -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \
 -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \
 -uuid eefb718c-137c-d416-e573-dd74ecd3490d \
-drive file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none \ -device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on \
 -vnc 127.0.0.1:0 \
-device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3 -netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \ -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1233 \ -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1234 \
 --trace events=/root/vfio_events \
 -monitor unix:/tmp/qmp_socket1,server,nowait \
 -serial stdio \
 -msg timestamp=on

Destination qemu-cmdline:
/usr/libexec/qemu-kvm \
 -name guest=rhel75-mig,debug-threads=on \
 -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \
 -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \
 -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \
 -uuid eefb718c-137c-d416-e573-dd74ecd3490d \
-drive file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none \ -device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on \
 -vnc 127.0.0.1:1 \
-device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3 -netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \ -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1235 \ -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1236 \
 -incoming unix:/tmp/mig_socket \
 --trace events=/root/vfio_events \
 -monitor unix:/tmp/qmp_socket2,server,nowait \
 -serial stdio \
 -msg timestamp=on

Migrate:
echo "migrate_set_speed 0" | sudo nc -U /tmp/qmp_socket1
echo "migrate -d unix:/tmp/mig_socket" | sudo nc -U $/tmp/qmp_socket1

After migration, 'qemu_loadvm_state_section_startfull' traces:

qemu_loadvm_state_section_startfull 0.000 pid=1457 section_id=0x2 idstr=b'ram' instance_id=0x0 version_id=0x4 qemu_loadvm_state_section_startfull 515.606 pid=1457 section_id=0x2e idstr=b'vfio' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 10.661 pid=1457 section_id=0x2f idstr=b'vfio' instance_id=0x1 version_id=0x1 qemu_loadvm_state_section_startfull 1120000.237 pid=1457 section_id=0x0 idstr=b'timer' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 9.058 pid=1457 section_id=0x4 idstr=b'cpu_common' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 26.453 pid=1457 section_id=0x5 idstr=b'cpu' instance_id=0x0 version_id=0xc qemu_loadvm_state_section_startfull 105.173 pid=1457 section_id=0x6 idstr=b'kvm-tpr-opt' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 940.028 pid=1457 section_id=0x7 idstr=b'apic' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 69.939 pid=1457 section_id=0x8 idstr=b'cpu_common' instance_id=0x1 version_id=0x1 qemu_loadvm_state_section_startfull 14.319 pid=1457 section_id=0x9 idstr=b'cpu' instance_id=0x1 version_id=0xc qemu_loadvm_state_section_startfull 102.986 pid=1457 section_id=0xa idstr=b'apic' instance_id=0x1 version_id=0x3 qemu_loadvm_state_section_startfull 107.910 pid=1457 section_id=0xb idstr=b'kvmclock' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 8.349 pid=1457 section_id=0xc idstr=b'fw_cfg' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 8.603 pid=1457 section_id=0xd idstr=b'PCIBUS' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 6.557 pid=1457 section_id=0xe idstr=b'0000:00:00.0/I440FX' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 633.727 pid=1457 section_id=0xf idstr=b'0000:00:01.0/PIIX3' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 14.907 pid=1457 section_id=0x10 idstr=b'i8259' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 11.465 pid=1457 section_id=0x11 idstr=b'i8259' instance_id=0x1 version_id=0x1 qemu_loadvm_state_section_startfull 5.663 pid=1457 section_id=0x12 idstr=b'ioapic' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 11.787 pid=1457 section_id=0x13 idstr=b'0000:00:02.0/vga' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 1718.618 pid=1457 section_id=0x14 idstr=b'hpet' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 16.212 pid=1457 section_id=0x15 idstr=b'mc146818rtc' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 9.946 pid=1457 section_id=0x16 idstr=b'i8254' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 12.879 pid=1457 section_id=0x17 idstr=b'pcspk' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 3.115 pid=1457 section_id=0x18 idstr=b'dma' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 10.432 pid=1457 section_id=0x19 idstr=b'dma' instance_id=0x1 version_id=0x1 qemu_loadvm_state_section_startfull 12.263 pid=1457 section_id=0x1a idstr=b'serial' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 7.299 pid=1457 section_id=0x1b idstr=b'parallel_isa' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 3.399 pid=1457 section_id=0x1c idstr=b'fdc' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 33.307 pid=1457 section_id=0x1d idstr=b'ps2kbd' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 6.961 pid=1457 section_id=0x1e idstr=b'ps2mouse' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 5.485 pid=1457 section_id=0x1f idstr=b'pckbd' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 3.984 pid=1457 section_id=0x20 idstr=b'vmmouse' instance_id=0x0 version_id=0x0 qemu_loadvm_state_section_startfull 105.948 pid=1457 section_id=0x21 idstr=b'port92' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 2.443 pid=1457 section_id=0x22 idstr=b'0000:00:01.1/ide' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 1094.861 pid=1457 section_id=0x23 idstr=b'i2c_bus' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 3.416 pid=1457 section_id=0x24 idstr=b'0000:00:01.3/piix4_pm' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 2266.518 pid=1457 section_id=0x2d idstr=b'0000:00:03.0/rtl8139' instance_id=0x0 version_id=0x5 qemu_loadvm_state_section_startfull 1619.840 pid=1457 section_id=0x30 idstr=b'acpi_build' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 4.200 pid=1457 section_id=0x31 idstr=b'globalstate' instance_id=0x0 version_id=0x1

Thanks,
Kirti

Reply via email to