For more information, see the commit messages of the added patches and: https://lore.kernel.org/qemu-devel/[email protected]/
Signed-off-by: Fiona Ebner <[email protected]> --- ...-rename-field-to-num_initial_regions.patch | 245 ++++++++++++++++++ ...region-info-cache-for-initial-region.patch | 75 ++++++ debian/patches/series | 2 + 3 files changed, 322 insertions(+) create mode 100644 debian/patches/extra/0006-vfio-rename-field-to-num_initial_regions.patch create mode 100644 debian/patches/extra/0007-vfio-only-check-region-info-cache-for-initial-region.patch diff --git a/debian/patches/extra/0006-vfio-rename-field-to-num_initial_regions.patch b/debian/patches/extra/0006-vfio-rename-field-to-num_initial_regions.patch new file mode 100644 index 0000000..3662f1d --- /dev/null +++ b/debian/patches/extra/0006-vfio-rename-field-to-num_initial_regions.patch @@ -0,0 +1,245 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: John Levon <[email protected]> +Date: Tue, 14 Oct 2025 17:12:26 +0200 +Subject: [PATCH] vfio: rename field to "num_initial_regions" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We set VFIODevice::num_regions at initialization time, and do not +otherwise refresh it. As it is valid in theory for a VFIO device to +later increase the number of supported regions, rename the field to +"num_initial_regions" to better reflect its semantics. + +Signed-off-by: John Levon <[email protected]> +Reviewed-by: Cédric Le Goater <[email protected]> +Reviewed-by: Alex Williamson <[email protected]> +Link: https://lore.kernel.org/qemu-devel/[email protected] +Signed-off-by: Cédric Le Goater <[email protected]> +(cherry picked from commit d5176a39405f0e0d20dff173e58255a7d5099411 + from https://gitlab.com/legoater/qemu/-/tree/vfio-next) +[FE: also rename in hw/vfio/platform.c and hw/core/sysbus-fdt.c + where affected code got dropped in master, but is still in v10.1] +Signed-off-by: Fiona Ebner <[email protected]> +--- + hw/core/sysbus-fdt.c | 14 +++++++------- + hw/vfio-user/device.c | 2 +- + hw/vfio/ccw.c | 4 ++-- + hw/vfio/device.c | 12 ++++++------ + hw/vfio/iommufd.c | 3 ++- + hw/vfio/pci.c | 4 ++-- + hw/vfio/platform.c | 10 +++++----- + include/hw/vfio/vfio-device.h | 2 +- + 8 files changed, 26 insertions(+), 25 deletions(-) + +diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c +index c339a27875..1e1966813f 100644 +--- a/hw/core/sysbus-fdt.c ++++ b/hw/core/sysbus-fdt.c +@@ -236,15 +236,15 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque) + + qemu_fdt_setprop(fdt, nodename, "dma-coherent", "", 0); + +- reg_attr = g_new(uint32_t, vbasedev->num_regions * 2); +- for (i = 0; i < vbasedev->num_regions; i++) { ++ reg_attr = g_new(uint32_t, vbasedev->num_initial_regions * 2); ++ for (i = 0; i < vbasedev->num_initial_regions; i++) { + mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); + reg_attr[2 * i] = cpu_to_be32(mmio_base); + reg_attr[2 * i + 1] = cpu_to_be32( + memory_region_size(vdev->regions[i]->mem)); + } + qemu_fdt_setprop(fdt, nodename, "reg", reg_attr, +- vbasedev->num_regions * 2 * sizeof(uint32_t)); ++ vbasedev->num_initial_regions * 2 * sizeof(uint32_t)); + + irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3); + for (i = 0; i < vbasedev->num_irqs; i++) { +@@ -330,7 +330,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque) + + g_free(dt_name); + +- if (vbasedev->num_regions != 5) { ++ if (vbasedev->num_initial_regions != 5) { + error_report("%s Does the host dt node combine XGBE/PHY?", __func__); + exit(1); + } +@@ -374,15 +374,15 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque) + guest_clock_phandles[0], + guest_clock_phandles[1]); + +- reg_attr = g_new(uint32_t, vbasedev->num_regions * 2); +- for (i = 0; i < vbasedev->num_regions; i++) { ++ reg_attr = g_new(uint32_t, vbasedev->num_initial_regions * 2); ++ for (i = 0; i < vbasedev->num_initial_regions; i++) { + mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); + reg_attr[2 * i] = cpu_to_be32(mmio_base); + reg_attr[2 * i + 1] = cpu_to_be32( + memory_region_size(vdev->regions[i]->mem)); + } + qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr, +- vbasedev->num_regions * 2 * sizeof(uint32_t)); ++ vbasedev->num_initial_regions * 2 * sizeof(uint32_t)); + + irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3); + for (i = 0; i < vbasedev->num_irqs; i++) { +diff --git a/hw/vfio-user/device.c b/hw/vfio-user/device.c +index 0609a7dc25..64ef35b320 100644 +--- a/hw/vfio-user/device.c ++++ b/hw/vfio-user/device.c +@@ -134,7 +134,7 @@ static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev, + VFIOUserFDs fds = { 0, 1, fd}; + int ret; + +- if (info->index > vbasedev->num_regions) { ++ if (info->index > vbasedev->num_initial_regions) { + return -EINVAL; + } + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 9560b8d851..4d9588e7aa 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -484,9 +484,9 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) + * We always expect at least the I/O region to be present. We also + * may have a variable number of regions governed by capabilities. + */ +- if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { ++ if (vdev->num_initial_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { + error_setg(errp, "vfio: too few regions (%u), expected at least %u", +- vdev->num_regions, VFIO_CCW_CONFIG_REGION_INDEX + 1); ++ vdev->num_initial_regions, VFIO_CCW_CONFIG_REGION_INDEX + 1); + return false; + } + +diff --git a/hw/vfio/device.c b/hw/vfio/device.c +index 52a1996dc4..0b459c0f7c 100644 +--- a/hw/vfio/device.c ++++ b/hw/vfio/device.c +@@ -257,7 +257,7 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, + { + int i; + +- for (i = 0; i < vbasedev->num_regions; i++) { ++ for (i = 0; i < vbasedev->num_initial_regions; i++) { + struct vfio_info_cap_header *hdr; + struct vfio_region_info_cap_type *cap_type; + +@@ -466,7 +466,7 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, + int i; + + vbasedev->num_irqs = info->num_irqs; +- vbasedev->num_regions = info->num_regions; ++ vbasedev->num_initial_regions = info->num_regions; + vbasedev->flags = info->flags; + vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); + +@@ -476,10 +476,10 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, + QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + + vbasedev->reginfo = g_new0(struct vfio_region_info *, +- vbasedev->num_regions); ++ vbasedev->num_initial_regions); + if (vbasedev->use_region_fds) { +- vbasedev->region_fds = g_new0(int, vbasedev->num_regions); +- for (i = 0; i < vbasedev->num_regions; i++) { ++ vbasedev->region_fds = g_new0(int, vbasedev->num_initial_regions); ++ for (i = 0; i < vbasedev->num_initial_regions; i++) { + vbasedev->region_fds[i] = -1; + } + } +@@ -489,7 +489,7 @@ void vfio_device_unprepare(VFIODevice *vbasedev) + { + int i; + +- for (i = 0; i < vbasedev->num_regions; i++) { ++ for (i = 0; i < vbasedev->num_initial_regions; i++) { + g_free(vbasedev->reginfo[i]); + if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) { + close(vbasedev->region_fds[i]); +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 48c590b6a9..dbcd861b27 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -668,7 +668,8 @@ found_container: + vfio_iommufd_cpr_register_device(vbasedev); + + trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, +- vbasedev->num_regions, vbasedev->flags); ++ vbasedev->num_initial_regions, ++ vbasedev->flags); + return true; + + err_listener_register: +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 07257d0fa0..1e69055c7c 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2930,9 +2930,9 @@ bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp) + return false; + } + +- if (vbasedev->num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { ++ if (vbasedev->num_initial_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { + error_setg(errp, "unexpected number of io regions %u", +- vbasedev->num_regions); ++ vbasedev->num_initial_regions); + return false; + } + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 5c1795a26f..c9349ba7b7 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -148,7 +148,7 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled) + { + int i; + +- for (i = 0; i < vdev->vbasedev.num_regions; i++) { ++ for (i = 0; i < vdev->vbasedev.num_initial_regions; i++) { + vfio_region_mmaps_set_enabled(vdev->regions[i], enabled); + } + } +@@ -453,9 +453,9 @@ static bool vfio_populate_device(VFIODevice *vbasedev, Error **errp) + return false; + } + +- vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions); ++ vdev->regions = g_new0(VFIORegion *, vbasedev->num_initial_regions); + +- for (i = 0; i < vbasedev->num_regions; i++) { ++ for (i = 0; i < vbasedev->num_initial_regions; i++) { + char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i); + + vdev->regions[i] = g_new0(VFIORegion, 1); +@@ -499,7 +499,7 @@ irq_err: + g_free(intp); + } + reg_error: +- for (i = 0; i < vbasedev->num_regions; i++) { ++ for (i = 0; i < vbasedev->num_initial_regions; i++) { + if (vdev->regions[i]) { + vfio_region_finalize(vdev->regions[i]); + } +@@ -608,7 +608,7 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) + } + } + +- for (i = 0; i < vbasedev->num_regions; i++) { ++ for (i = 0; i < vbasedev->num_initial_regions; i++) { + if (vfio_region_mmap(vdev->regions[i])) { + warn_report("%s mmap unsupported, performance may be slow", + memory_region_name(vdev->regions[i]->mem)); +diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h +index 6e4d5ccdac..10024730a1 100644 +--- a/include/hw/vfio/vfio-device.h ++++ b/include/hw/vfio/vfio-device.h +@@ -74,7 +74,7 @@ typedef struct VFIODevice { + VFIODeviceOps *ops; + VFIODeviceIOOps *io_ops; + unsigned int num_irqs; +- unsigned int num_regions; ++ unsigned int num_initial_regions; + unsigned int flags; + VFIOMigration *migration; + Error *migration_blocker; diff --git a/debian/patches/extra/0007-vfio-only-check-region-info-cache-for-initial-region.patch b/debian/patches/extra/0007-vfio-only-check-region-info-cache-for-initial-region.patch new file mode 100644 index 0000000..b239cb4 --- /dev/null +++ b/debian/patches/extra/0007-vfio-only-check-region-info-cache-for-initial-region.patch @@ -0,0 +1,75 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: John Levon <[email protected]> +Date: Tue, 14 Oct 2025 17:12:27 +0200 +Subject: [PATCH] vfio: only check region info cache for initial regions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +It is semantically valid for a VFIO device to increase the number of +regions after initialization. In this case, we'd attempt to check for +cached region info past the size of the ->reginfo array. Check for the +region index and skip the cache in these cases. + +This also works around some VGPU use cases which appear to be a bug, +where VFIO_DEVICE_QUERY_GFX_PLANE returns a region index beyond the +reported ->num_regions. + +Fixes: 95cdb024 ("vfio: add region info cache") +Signed-off-by: John Levon <[email protected]> +Reviewed-by: Cédric Le Goater <[email protected]> +Reviewed-by: Alex Williamson <[email protected]> +Link: https://lore.kernel.org/qemu-devel/[email protected] +Signed-off-by: Cédric Le Goater <[email protected]> +(cherry picked from commit 5bdcf2df64bf7e4be58524ef1442836b6d41282e + from https://gitlab.com/legoater/qemu/-/tree/vfio-next) +Signed-off-by: Fiona Ebner <[email protected]> +--- + hw/vfio/device.c | 27 +++++++++++++++++++-------- + 1 file changed, 19 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/device.c b/hw/vfio/device.c +index 0b459c0f7c..7ebf41c95e 100644 +--- a/hw/vfio/device.c ++++ b/hw/vfio/device.c +@@ -205,10 +205,19 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index, + int fd = -1; + int ret; + +- /* check cache */ +- if (vbasedev->reginfo[index] != NULL) { +- *info = vbasedev->reginfo[index]; +- return 0; ++ /* ++ * We only set up the region info cache for the initial number of regions. ++ * ++ * Since a VFIO device may later increase the number of regions then use ++ * such regions with an index past ->num_initial_regions, don't attempt to ++ * use the info cache in those cases. ++ */ ++ if (index < vbasedev->num_initial_regions) { ++ /* check cache */ ++ if (vbasedev->reginfo[index] != NULL) { ++ *info = vbasedev->reginfo[index]; ++ return 0; ++ } + } + + *info = g_malloc0(argsz); +@@ -236,10 +245,12 @@ retry: + goto retry; + } + +- /* fill cache */ +- vbasedev->reginfo[index] = *info; +- if (vbasedev->region_fds != NULL) { +- vbasedev->region_fds[index] = fd; ++ if (index < vbasedev->num_initial_regions) { ++ /* fill cache */ ++ vbasedev->reginfo[index] = *info; ++ if (vbasedev->region_fds != NULL) { ++ vbasedev->region_fds[index] = fd; ++ } + } + + return 0; diff --git a/debian/patches/series b/debian/patches/series index 29c18ec..900310a 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -3,6 +3,8 @@ extra/0002-ide-avoid-potential-deadlock-when-draining-during-tr.patch extra/0003-tcg-arm-Fix-tgen_deposit.patch extra/0004-vfio-igd-Enable-quirks-when-IGD-is-not-the-primary-d.patch extra/0005-hw-scsi-avoid-deadlock-upon-TMF-request-cancelling-w.patch +extra/0006-vfio-rename-field-to-num_initial_regions.patch +extra/0007-vfio-only-check-region-info-cache-for-initial-region.patch bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch -- 2.47.3 _______________________________________________ pve-devel mailing list [email protected] https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
