Hello John,

On 5/9/25 15:13, Cédric Le Goater wrote:
From: John Levon <[email protected]>

Instead of requesting region information on demand with
VFIO_DEVICE_GET_REGION_INFO, maintain a cache: this will become
necessary for performance for vfio-user, where this call becomes a
message over the control socket, so is of higher overhead than the
traditional path.

We will also need it to generalize region accesses, as that means we
can't use ->config_offset for configuration space accesses, but must
look up the region offset (if relevant) each time.

Originally-by: John Johnson <[email protected]>
Signed-off-by: Elena Ufimtseva <[email protected]>
Signed-off-by: Jagannathan Raman <[email protected]>
Signed-off-by: John Levon <[email protected]>
Reviewed-by: Cédric Le Goater <[email protected]>
Link: 
https://lore.kernel.org/qemu-devel/[email protected]
Signed-off-by: Cédric Le Goater <[email protected]>
---
  include/hw/vfio/vfio-device.h |  1 +
  hw/vfio/ccw.c                 |  5 -----
  hw/vfio/device.c              | 25 +++++++++++++++++++++----
  hw/vfio/igd.c                 | 10 +++++-----
  hw/vfio/pci.c                 |  6 +++---
  hw/vfio/region.c              |  2 +-
  6 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 
7e1e81e76bd2b484108282f7147ff7a0793360cf..4fff3dcee3b4a2a3c8a4bd6fa017cf822a16668e
 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -83,6 +83,7 @@ typedef struct VFIODevice {
      IOMMUFDBackend *iommufd;
      VFIOIOASHwpt *hwpt;
      QLIST_ENTRY(VFIODevice) hwpt_next;
+    struct vfio_region_info **reginfo;
  } VFIODevice;
struct VFIODeviceOps {
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 
ab3fabf991c35abb6c40d925f526c2a2d8955aef..cea9d6e00562c241832184cafe862005fcdc992b
 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -504,7 +504,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error 
**errp)
vcdev->io_region_offset = info->offset;
      vcdev->io_region = g_malloc0(info->size);
-    g_free(info);
/* check for the optional async command region */
      ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
@@ -517,7 +516,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error 
**errp)
          }
          vcdev->async_cmd_region_offset = info->offset;
          vcdev->async_cmd_region = g_malloc0(info->size);
-        g_free(info);
      }
ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
@@ -530,7 +528,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error 
**errp)
          }
          vcdev->schib_region_offset = info->offset;
          vcdev->schib_region = g_malloc(info->size);
-        g_free(info);
      }
ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
@@ -544,7 +541,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error 
**errp)
          }
          vcdev->crw_region_offset = info->offset;
          vcdev->crw_region = g_malloc(info->size);
-        g_free(info);
      }
return true;
@@ -554,7 +550,6 @@ out_err:
      g_free(vcdev->schib_region);
      g_free(vcdev->async_cmd_region);
      g_free(vcdev->io_region);
-    g_free(info);
      return false;
  }
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 
40a196bfb9a86656610863d4f064db383c7072e0..77b0675abe7b4f4155ea79cf3fd0e3ab4b21f8ea
 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -202,6 +202,12 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int 
index,
      size_t argsz = sizeof(struct vfio_region_info);
      int ret;
+ /* check cache */

It would be good to add an assert to check the index value. More important
we need to fix an ugly "index out-of-bounds" bug that can occur when booting
a VM with a vGPU :

  -device vfio-pci-nohotplug,host=0000:27:00.4,display=on,ramfb=true ...

The interesting part is :

  Thread 1 (Thread 0x7ffff6891ec0 (LWP 11372) "qemu-kvm"):
  #0  0x000055555581b83d in vfio_region_setup (obj=0x5555588c0b70, vbasedev=0x5555588c1630, 
region=0x555558a9c040, index=9, name=0x555555de94ba <str.68.llvm> "display") at 
../hw/vfio/region.c:199
  #1  0x00005555558208a4 in vfio_display_region_update (opaque=<optimized out>) 
at ../hw/vfio/display.c:449
  #2  0x00005555556bdd6c in graphic_hw_update (con=0x555558acf830) at 
../ui/console.c:143
  #3  vnc_refresh (dcl=0x7fffec048050) at ../ui/vnc.c:3262
  #4  0x00005555556a15cb in dpy_refresh (s=0x555558acf980) at 
../ui/console.c:880
  #5  gui_update (opaque=0x555558acf980) at ../ui/console.c:90
  (gdb) p vbasedev->num_regions
  $9 = 9

Index 9 is beyond the maximum valid index of the reginfo array :/

We didn't take into account the ioctl VFIO_DEVICE_QUERY_GFX_PLANE
which can return region index 9 which is beyond the maximum valid
index of the reginfo array :/

I didn't expect that. Sometimes the best way to learn how something
works is to break it ... Ideas welcome.

Thanks,

C.



+    if (vbasedev->reginfo[index] != NULL) {
+        *info = vbasedev->reginfo[index];
+        return 0;
+    }
+
      *info = g_malloc0(argsz);
(*info)->index = index;
@@ -222,6 +228,9 @@ retry:
          goto retry;
      }
+ /* fill cache */
+    vbasedev->reginfo[index] = *info;
+
      return 0;
  }
@@ -240,7 +249,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE);
          if (!hdr) {
-            g_free(*info);
              continue;
          }
@@ -252,8 +260,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
          if (cap_type->type == type && cap_type->subtype == subtype) {
              return 0;
          }
-
-        g_free(*info);
      }
*info = NULL;
@@ -262,7 +268,7 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, 
uint32_t type,
bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
  {
-    g_autofree struct vfio_region_info *info = NULL;
+    struct vfio_region_info *info = NULL;
      bool ret = false;
if (!vfio_device_get_region_info(vbasedev, region, &info)) {
@@ -435,10 +441,21 @@ void vfio_device_prepare(VFIODevice *vbasedev, 
VFIOContainerBase *bcontainer,
      QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
+
+    vbasedev->reginfo = g_new0(struct vfio_region_info *,
+                               vbasedev->num_regions);
  }
void vfio_device_unprepare(VFIODevice *vbasedev)
  {
+    int i;
+
+    for (i = 0; i < vbasedev->num_regions; i++) {
+        g_free(vbasedev->reginfo[i]);
+    }
+    g_free(vbasedev->reginfo);
+    vbasedev->reginfo = NULL;
+
      QLIST_REMOVE(vbasedev, container_next);
      QLIST_REMOVE(vbasedev, global_next);
      vbasedev->bcontainer = NULL;
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index 
3ee1a73b57c57563f661a246e6dd7e505231be91..e7952d15a014f28fe302cab312ab8e60a414c679
 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -349,8 +349,8 @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp)
  {
-    g_autofree struct vfio_region_info *host = NULL;
-    g_autofree struct vfio_region_info *lpc = NULL;
+    struct vfio_region_info *host = NULL;
+    struct vfio_region_info *lpc = NULL;
      PCIDevice *lpc_bridge;
      int ret;
@@ -510,7 +510,7 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
  {
-    g_autofree struct vfio_region_info *opregion = NULL;
+    struct vfio_region_info *opregion = NULL;
      int ret, gen;
      uint64_t gms_size = 0;
      uint64_t *bdsm_size;
@@ -551,7 +551,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, 
Error **errp)
           * - OpRegion
           * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host
           */
-        g_autofree struct vfio_region_info *rom = NULL;
+        struct vfio_region_info *rom = NULL;
legacy_mode_enabled = true;
          info_report("IGD legacy mode enabled, "
@@ -681,7 +681,7 @@ error:
   */
  static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
  {
-    g_autofree struct vfio_region_info *opregion = NULL;
+    struct vfio_region_info *opregion = NULL;
      int gen;
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 
da2ffc9bf36778fb61a7decf345391caee528b2a..9136cf52c82711939db01c09a8a1277327bc484f
 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -883,8 +883,8 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
  {
-    g_autofree struct vfio_region_info *reg_info = NULL;
      VFIODevice *vbasedev = &vdev->vbasedev;
+    struct vfio_region_info *reg_info = NULL;
      uint64_t size;
      off_t off = 0;
      ssize_t bytes;
@@ -2710,7 +2710,7 @@ static VFIODeviceOps vfio_pci_ops = {
  bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
  {
      VFIODevice *vbasedev = &vdev->vbasedev;
-    g_autofree struct vfio_region_info *reg_info = NULL;
+    struct vfio_region_info *reg_info = NULL;
      int ret;
ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, &reg_info);
@@ -2775,7 +2775,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
  static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
  {
      VFIODevice *vbasedev = &vdev->vbasedev;
-    g_autofree struct vfio_region_info *reg_info = NULL;
+    struct vfio_region_info *reg_info = NULL;
      struct vfio_irq_info irq_info;
      int i, ret = -1;
diff --git a/hw/vfio/region.c b/hw/vfio/region.c
index 
04bf9eb0987c0ac460e0a1c3ba5abdf4a87f7499..ef2630cac3924fef200b2ece1be0dbad219dd67d
 100644
--- a/hw/vfio/region.c
+++ b/hw/vfio/region.c
@@ -182,7 +182,7 @@ static int vfio_setup_region_sparse_mmaps(VFIORegion 
*region,
  int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
                        int index, const char *name)
  {
-    g_autofree struct vfio_region_info *info = NULL;
+    struct vfio_region_info *info = NULL;
      int ret;
ret = vfio_device_get_region_info(vbasedev, index, &info);


Reply via email to