From: Joao Martins <joao.m.mart...@oracle.com> Add device dirty page tracking start/stop functionality. This uses the device DMA logging uAPI to start and stop dirty page tracking by device.
Device dirty page tracking is used only if all devices within a container support device dirty page tracking. Signed-off-by: Joao Martins <joao.m.mart...@oracle.com> Signed-off-by: Avihai Horon <avih...@nvidia.com> --- include/hw/vfio/vfio-common.h | 2 + hw/vfio/common.c | 211 +++++++++++++++++++++++++++++++++- 2 files changed, 211 insertions(+), 2 deletions(-) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index d54000d7ae..cde6ffb9d6 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -151,6 +151,8 @@ typedef struct VFIODevice { VFIOMigration *migration; Error *migration_blocker; OnOffAuto pre_copy_dirty_page_tracking; + bool dirty_pages_supported; + bool dirty_tracking; } VFIODevice; struct VFIODeviceOps { diff --git a/hw/vfio/common.c b/hw/vfio/common.c index fafc361cea..005c060c67 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -422,6 +422,22 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return true; } +static bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) +{ + VFIOGroup *group; + VFIODevice *vbasedev; + + QLIST_FOREACH(group, &container->group_list, container_next) { + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (!vbasedev->dirty_pages_supported) { + return false; + } + } + } + + return true; +} + /* * Check if all VFIO devices are running and migration is active, which is * essentially equivalent to the migration being in pre-copy phase. @@ -1355,13 +1371,192 @@ static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) return ret; } +static int vfio_devices_dma_logging_set(VFIOContainer *container, + struct vfio_device_feature *feature) +{ + bool status = (feature->flags & VFIO_DEVICE_FEATURE_MASK) == + VFIO_DEVICE_FEATURE_DMA_LOGGING_START; + VFIODevice *vbasedev; + VFIOGroup *group; + int ret = 0; + + QLIST_FOREACH(group, &container->group_list, container_next) { + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (vbasedev->dirty_tracking == status) { + continue; + } + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + if (ret) { + ret = -errno; + error_report("%s: Failed to set DMA logging %s, err %d (%s)", + vbasedev->name, status ? "start" : "stop", ret, + strerror(errno)); + goto out; + } + vbasedev->dirty_tracking = status; + } + } + +out: + return ret; +} + +static int vfio_devices_dma_logging_stop(VFIOContainer *container) +{ + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; + + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_SET; + feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; + + return vfio_devices_dma_logging_set(container, feature); +} + +static gboolean vfio_device_dma_logging_range_add(DMAMap *map, gpointer data) +{ + struct vfio_device_feature_dma_logging_range **out = data; + struct vfio_device_feature_dma_logging_range *range = *out; + + range->iova = map->iova; + /* IOVATree is inclusive, DMA logging uAPI isn't, so add 1 to length */ + range->length = map->size + 1; + + *out = ++range; + + return false; +} + +static gboolean vfio_iova_tree_get_first(DMAMap *map, gpointer data) +{ + DMAMap *first = data; + + first->iova = map->iova; + first->size = map->size; + + return true; +} + +static gboolean vfio_iova_tree_get_last(DMAMap *map, gpointer data) +{ + DMAMap *last = data; + + last->iova = map->iova; + last->size = map->size; + + return false; +} + +static struct vfio_device_feature * +vfio_device_feature_dma_logging_start_create(VFIOContainer *container) +{ + struct vfio_device_feature *feature; + size_t feature_size; + struct vfio_device_feature_dma_logging_control *control; + struct vfio_device_feature_dma_logging_range *ranges; + unsigned int max_ranges; + unsigned int cur_ranges; + + feature_size = sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_dma_logging_control); + feature = g_malloc0(feature_size); + feature->argsz = feature_size; + feature->flags = VFIO_DEVICE_FEATURE_SET; + feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_START; + + control = (struct vfio_device_feature_dma_logging_control *)feature->data; + control->page_size = qemu_real_host_page_size(); + + QEMU_LOCK_GUARD(&container->mappings_mutex); + + /* + * DMA logging uAPI guarantees to support at least num_ranges that fits into + * a single host kernel page. To be on the safe side, use this as a limit + * from which to merge to a single range. + */ + max_ranges = qemu_real_host_page_size() / sizeof(*ranges); + cur_ranges = iova_tree_nnodes(container->mappings); + control->num_ranges = (cur_ranges <= max_ranges) ? cur_ranges : 1; + ranges = g_try_new0(struct vfio_device_feature_dma_logging_range, + control->num_ranges); + if (!ranges) { + g_free(feature); + errno = ENOMEM; + + return NULL; + } + + control->ranges = (uint64_t)ranges; + if (cur_ranges <= max_ranges) { + iova_tree_foreach(container->mappings, + vfio_device_dma_logging_range_add, &ranges); + } else { + DMAMap first, last; + + iova_tree_foreach(container->mappings, vfio_iova_tree_get_first, + &first); + iova_tree_foreach(container->mappings, vfio_iova_tree_get_last, &last); + ranges->iova = first.iova; + /* IOVATree is inclusive, DMA logging uAPI isn't, so add 1 to length */ + ranges->length = (last.iova - first.iova) + last.size + 1; + } + + return feature; +} + +static void vfio_device_feature_dma_logging_start_destroy( + struct vfio_device_feature *feature) +{ + struct vfio_device_feature_dma_logging_control *control = + (struct vfio_device_feature_dma_logging_control *)feature->data; + struct vfio_device_feature_dma_logging_range *ranges = + (struct vfio_device_feature_dma_logging_range *)control->ranges; + + g_free(ranges); + g_free(feature); +} + +static int vfio_devices_dma_logging_start(VFIOContainer *container) +{ + struct vfio_device_feature *feature; + int ret; + + feature = vfio_device_feature_dma_logging_start_create(container); + if (!feature) { + return -errno; + } + + ret = vfio_devices_dma_logging_set(container, feature); + if (ret) { + vfio_devices_dma_logging_stop(container); + } + + vfio_device_feature_dma_logging_start_destroy(feature); + + return ret; +} + static void vfio_listener_log_global_start(MemoryListener *listener) { VFIOContainer *container = container_of(listener, VFIOContainer, listener); int ret; - ret = vfio_set_dirty_page_tracking(container, true); + if (vfio_devices_all_device_dirty_tracking(container)) { + if (vfio_have_giommu(container)) { + /* Device dirty page tracking currently doesn't support vIOMMU */ + return; + } + + ret = vfio_devices_dma_logging_start(container); + } else { + ret = vfio_set_dirty_page_tracking(container, true); + } + if (ret) { + error_report("vfio: Could not start dirty page tracking, err: %d (%s)", + ret, strerror(-ret)); vfio_set_migration_error(ret); } } @@ -1371,8 +1566,20 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) VFIOContainer *container = container_of(listener, VFIOContainer, listener); int ret; - ret = vfio_set_dirty_page_tracking(container, false); + if (vfio_devices_all_device_dirty_tracking(container)) { + if (vfio_have_giommu(container)) { + /* Device dirty page tracking currently doesn't support vIOMMU */ + return; + } + + ret = vfio_devices_dma_logging_stop(container); + } else { + ret = vfio_set_dirty_page_tracking(container, false); + } + if (ret) { + error_report("vfio: Could not stop dirty page tracking, err: %d (%s)", + ret, strerror(-ret)); vfio_set_migration_error(ret); } } -- 2.26.3