At vfio creation time, save the value of vfio container, group, and device descriptors in CPR state. On qemu restart, vfio_realize() finds and uses the saved descriptors.
During reuse, device and iommu state is already configured, so operations in vfio_realize that would modify the configuration, such as vfio ioctl's, are skipped. The result is that vfio_realize constructs qemu data structures that reflect the current state of the device. Signed-off-by: Steve Sistare <steven.sist...@oracle.com> --- include/hw/vfio/vfio-cpr.h | 6 +++++ hw/vfio/container.c | 67 +++++++++++++++++++++++++++++++++++----------- hw/vfio/cpr-legacy.c | 42 +++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 15 deletions(-) diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index d4e0bd5..5a2e5f6 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -13,6 +13,7 @@ struct VFIOContainer; struct VFIOContainerBase; +struct VFIOGroup; typedef struct VFIOContainerCPR { Error *blocker; @@ -30,4 +31,9 @@ bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, Error **errp); void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); +int vfio_cpr_group_get_device_fd(int d, const char *name); + +bool vfio_cpr_container_match(struct VFIOContainer *container, + struct VFIOGroup *group, int fd); + #endif /* HW_VFIO_VFIO_CPR_H */ diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 7d2035c..798abda 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -31,6 +31,8 @@ #include "system/reset.h" #include "trace.h" #include "qapi/error.h" +#include "migration/cpr.h" +#include "migration/blocker.h" #include "pci.h" #include "hw/vfio/vfio-container.h" #include "hw/vfio/vfio-cpr.h" @@ -426,7 +428,12 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, return NULL; } - if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { + /* + * During CPR, just set the container type and skip the ioctls, as the + * container and group are already configured in the kernel. + */ + if (!cpr_is_incoming() && + !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { return NULL; } @@ -593,6 +600,11 @@ static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group, group->container = container; QLIST_INSERT_HEAD(&container->group_list, group, container_next); vfio_group_add_kvm_device(group); + /* + * Remember the container fd for each group, so we can attach to the same + * container after CPR. + */ + cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd); return true; } @@ -602,6 +614,7 @@ static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group) group->container = NULL; vfio_group_del_kvm_device(group); vfio_ram_block_discard_disable(container, false); + cpr_delete_fd("vfio_container_for_group", group->groupid); } static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, @@ -616,17 +629,34 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, bool group_was_added = false; space = vfio_address_space_get(as); + fd = cpr_find_fd("vfio_container_for_group", group->groupid); - QLIST_FOREACH(bcontainer, &space->containers, next) { - container = container_of(bcontainer, VFIOContainer, bcontainer); - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - return vfio_container_group_add(container, group, errp); + if (!cpr_is_incoming()) { + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOContainer, bcontainer); + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + return vfio_container_group_add(container, group, errp); + } } - } - fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); - if (fd < 0) { - goto fail; + fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); + if (fd < 0) { + goto fail; + } + } else { + /* + * For incoming CPR, the group is already attached in the kernel. + * If a container with matching fd is found, then update the + * userland group list and return. If not, then after the loop, + * create the container struct and group list. + */ + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOContainer, bcontainer); + + if (vfio_cpr_container_match(container, group, fd)) { + return vfio_container_group_add(container, group, errp); + } + } } ret = ioctl(fd, VFIO_GET_API_VERSION); @@ -698,6 +728,7 @@ static void vfio_container_disconnect(VFIOGroup *group) QLIST_REMOVE(group, container_next); group->container = NULL; + cpr_delete_fd("vfio_container_for_group", group->groupid); /* * Explicitly release the listener first before unset container, @@ -751,7 +782,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) group = g_malloc0(sizeof(*group)); snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); - group->fd = qemu_open(path, O_RDWR, errp); + group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp); if (group->fd < 0) { goto free_group_exit; } @@ -783,6 +814,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) return group; close_fd_exit: + cpr_delete_fd("vfio_group", groupid); close(group->fd); free_group_exit: @@ -804,6 +836,7 @@ static void vfio_group_put(VFIOGroup *group) vfio_container_disconnect(group); QLIST_REMOVE(group, next); trace_vfio_group_put(group->fd); + cpr_delete_fd("vfio_group", group->groupid); close(group->fd); g_free(group); } @@ -814,7 +847,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, g_autofree struct vfio_device_info *info = NULL; int fd; - fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + fd = vfio_cpr_group_get_device_fd(group->fd, name); if (fd < 0) { error_setg_errno(errp, errno, "error getting device from group %d", group->groupid); @@ -827,8 +860,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, info = vfio_get_device_info(fd); if (!info) { error_setg_errno(errp, errno, "error getting device info"); - close(fd); - return false; + goto fail; } /* @@ -842,8 +874,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, if (!QLIST_EMPTY(&group->device_list)) { error_setg(errp, "Inconsistent setting of support for discarding " "RAM (e.g., balloon) within group"); - close(fd); - return false; + goto fail; } if (!group->ram_block_discard_allowed) { @@ -861,6 +892,11 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs); return true; + +fail: + close(fd); + cpr_delete_fd(name, 0); + return false; } static void vfio_device_put(VFIODevice *vbasedev) @@ -871,6 +907,7 @@ static void vfio_device_put(VFIODevice *vbasedev) QLIST_REMOVE(vbasedev, next); vbasedev->group = NULL; trace_vfio_device_put(vbasedev->fd); + cpr_delete_fd(vbasedev->name, 0); close(vbasedev->fd); } diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c index 419b9fb..29be64f 100644 --- a/hw/vfio/cpr-legacy.c +++ b/hw/vfio/cpr-legacy.c @@ -9,6 +9,7 @@ #include "qemu/osdep.h" #include "hw/vfio/vfio-container.h" #include "hw/vfio/vfio-cpr.h" +#include "hw/vfio/vfio-device.h" #include "migration/blocker.h" #include "migration/cpr.h" #include "migration/migration.h" @@ -67,3 +68,44 @@ void vfio_legacy_cpr_unregister_container(VFIOContainer *container) migrate_del_blocker(&container->cpr.blocker); vmstate_unregister(NULL, &vfio_container_vmstate, container); } + +int vfio_cpr_group_get_device_fd(int d, const char *name) +{ + const int id = 0; + int fd = cpr_find_fd(name, id); + + if (fd < 0) { + fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name); + if (fd >= 0) { + cpr_save_fd(name, id, fd); + } + } + return fd; +} + +static bool same_device(int fd1, int fd2) +{ + struct stat st1, st2; + + return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev; +} + +bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group, + int fd) +{ + if (container->fd == fd) { + return true; + } + if (!same_device(container->fd, fd)) { + return false; + } + /* + * Same device, different fd. This occurs when the container fd is + * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS + * produces duplicates. De-dup it. + */ + cpr_delete_fd("vfio_container_for_group", group->groupid); + close(fd); + cpr_save_fd("vfio_container_for_group", group->groupid, container->fd); + return true; +} -- 1.8.3.1