Hi Eric,

>-----Original Message-----
>From: Eric Auger <eric.au...@redhat.com>
>Sent: Monday, October 9, 2023 5:09 PM
>Subject: [PATCH v5 07/15] vfio/pci: Introduce vfio_[attach/detach]_device
>
>We want the VFIO devices to be able to use two different
>IOMMU backends, the legacy VFIO one and the new iommufd one.
>
>Introduce vfio_[attach/detach]_device which aim at hiding the
>underlying IOMMU backend (IOCTLs, datatypes, ...).
>
>Once vfio_attach_device completes, the device is attached
>to a security context and its fd can be used. Conversely
>When vfio_detach_device completes, the device has been
>detached from the security context.
>
>At the moment only the implementation based on the legacy
>container/group exists. Let's use it from the vfio-pci device.
>Subsequent patches will handle other devices.
>
>We also take benefit of this patch to properly free
>vbasedev->name on failure.
>
>Signed-off-by: Eric Auger <eric.au...@redhat.com>
>Signed-off-by: Yi Liu <yi.l....@intel.com>
>Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>Reviewed-by: Cédric Le Goater <c...@redhat.com>
>
>---
>
>v4 -> v5:
>- remove vbasedev->name g_free as it is done on instance_finalize
>
>v2 -> v3:
>- added trace_vfio_detach_device
>- added a comment explaining why we pass @name to vfio_attach_device
>  although vbasedev->name is populated
>- free vbasedev->name and detach_device if needed
>---
> include/hw/vfio/vfio-common.h |  3 ++
> hw/vfio/common.c              | 74 +++++++++++++++++++++++++++++++++++
> hw/vfio/pci.c                 | 66 +++++++------------------------
> hw/vfio/trace-events          |  3 +-
> 4 files changed, 93 insertions(+), 53 deletions(-)
>
>diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>index c4e7c3b4a7..12fbfbc37d 100644
>--- a/include/hw/vfio/vfio-common.h
>+++ b/include/hw/vfio/vfio-common.h
>@@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
> struct vfio_device_info *vfio_get_device_info(int fd);
> int vfio_get_device(VFIOGroup *group, const char *name,
>                     VFIODevice *vbasedev, Error **errp);
>+int vfio_attach_device(char *name, VFIODevice *vbasedev,
>+                       AddressSpace *as, Error **errp);
>+void vfio_detach_device(VFIODevice *vbasedev);
>
> int vfio_kvm_device_add_fd(int fd, Error **errp);
> int vfio_kvm_device_del_fd(int fd, Error **errp);
>diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>index d8ed432cb6..f4c33c9858 100644
>--- a/hw/vfio/common.c
>+++ b/hw/vfio/common.c
>@@ -2611,3 +2611,77 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>     }
>     return vfio_eeh_container_op(container, op);
> }
>+
>+static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>+{
>+    char *tmp, group_path[PATH_MAX], *group_name;
>+    int ret, groupid;
>+    ssize_t len;
>+
>+    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>+    len = readlink(tmp, group_path, sizeof(group_path));
>+    g_free(tmp);
>+
>+    if (len <= 0 || len >= sizeof(group_path)) {
>+        ret = len < 0 ? -errno : -ENAMETOOLONG;
>+        error_setg_errno(errp, -ret, "no iommu_group found");
>+        return ret;
>+    }
>+
>+    group_path[len] = 0;
>+
>+    group_name = basename(group_path);
>+    if (sscanf(group_name, "%d", &groupid) != 1) {
>+        error_setg_errno(errp, errno, "failed to read %s", group_path);
>+        return -errno;
>+    }
>+    return groupid;
>+}
>+
>+/*
>+ * vfio_attach_device: attach a device to a security context
>+ * @name and @vbasedev->name are likely to be different depending
>+ * on the type of the device, hence the need for passing @name
>+ */
>+int vfio_attach_device(char *name, VFIODevice *vbasedev,
>+                       AddressSpace *as, Error **errp)
>+{
>+    int groupid = vfio_device_groupid(vbasedev, errp);
>+    VFIODevice *vbasedev_iter;
>+    VFIOGroup *group;
>+    int ret;
>+
>+    if (groupid < 0) {
>+        return groupid;
>+    }
>+
>+    trace_vfio_attach_device(vbasedev->name, groupid);
>+
>+    group = vfio_get_group(groupid, as, errp);
>+    if (!group) {
>+        return -ENOENT;
>+    }
>+
>+    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>+        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>+            error_setg(errp, "device is already attached");
>+            vfio_put_group(group);
>+            return -EBUSY;
>+        }
>+    }
>+    ret = vfio_get_device(group, name, vbasedev, errp);
>+    if (ret) {
>+        vfio_put_group(group);
>+    }
>+
>+    return ret;
>+}
>+
>+void vfio_detach_device(VFIODevice *vbasedev)
>+{
>+    VFIOGroup *group = vbasedev->group;
>+
>+    trace_vfio_detach_device(vbasedev->name, group->groupid);
>+    vfio_put_base_device(vbasedev);
>+    vfio_put_group(group);
>+}
>diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>index 898296fd54..40ae46266e 100644
>--- a/hw/vfio/pci.c
>+++ b/hw/vfio/pci.c
>@@ -2895,10 +2895,10 @@ static void vfio_populate_device(VFIOPCIDevice
>*vdev, Error **errp)
>
> static void vfio_pci_put_device(VFIOPCIDevice *vdev)
> {
>+    vfio_detach_device(&vdev->vbasedev);
>+
>     g_free(vdev->vbasedev.name);
>     g_free(vdev->msix);
>-
>-    vfio_put_base_device(&vdev->vbasedev);
> }
>
> static void vfio_err_notifier_handler(void *opaque)
>@@ -3045,13 +3045,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> {
>     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>     VFIODevice *vbasedev = &vdev->vbasedev;
>-    VFIODevice *vbasedev_iter;
>-    VFIOGroup *group;
>-    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
>+    char *tmp, *subsys;
>     Error *err = NULL;
>-    ssize_t len;
>     struct stat st;
>-    int groupid;
>     int i, ret;
>     bool is_mdev;
>     char uuid[UUID_FMT_LEN];
>@@ -3082,39 +3078,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>     vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>     vbasedev->dev = DEVICE(vdev);
>
>-    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>-    len = readlink(tmp, group_path, sizeof(group_path));
>-    g_free(tmp);
>-
>-    if (len <= 0 || len >= sizeof(group_path)) {
>-        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>-                         "no iommu_group found");
>-        goto error;
>-    }
>-
>-    group_path[len] = 0;
>-
>-    group_name = basename(group_path);
>-    if (sscanf(group_name, "%d", &groupid) != 1) {
>-        error_setg_errno(errp, errno, "failed to read %s", group_path);
>-        goto error;
>-    }
>-
>-    trace_vfio_realize(vbasedev->name, groupid);
>-
>-    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev),
>errp);
>-    if (!group) {
>-        goto error;
>-    }
>-
>-    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>-        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>-            error_setg(errp, "device is already attached");
>-            vfio_put_group(group);
>-            goto error;
>-        }
>-    }
>-
>     /*
>      * Mediated devices *might* operate compatibly with discarding of RAM, but
>      * we cannot know for certain, it depends on whether the mdev vendor 
> driver
>@@ -3132,7 +3095,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>     if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>         error_setg(errp, "x-balloon-allowed only potentially compatible "
>                    "with mdev devices");
>-        vfio_put_group(group);
>         goto error;
>     }
>
>@@ -3143,17 +3105,17 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>         name = g_strdup(vbasedev->name);
>     }
>
>-    ret = vfio_get_device(group, name, vbasedev, errp);
>+    ret = vfio_attach_device(name, vbasedev,
>+                             pci_device_iommu_address_space(pdev), errp);
>     g_free(name);
>     if (ret) {
>-        vfio_put_group(group);
>         goto error;
>     }
>
>     vfio_populate_device(vdev, &err);
>     if (err) {
>         error_propagate(errp, err);
>-        goto error;
>+        goto out_detach;
>     }
>
>     /* Get a copy of config space */
>@@ -3163,7 +3125,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>     if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
>         ret = ret < 0 ? -errno : -EFAULT;
>         error_setg_errno(errp, -ret, "failed to read device config space");
>-        goto error;
>+        goto out_detach;
>     }
>
>     /* vfio emulates a lot for us, but some bits need extra love */
>@@ -3182,7 +3144,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>     if (vdev->vendor_id != PCI_ANY_ID) {
>         if (vdev->vendor_id >= 0xffff) {
>             error_setg(errp, "invalid PCI vendor ID provided");
>-            goto error;
>+            goto out_detach;
>         }
>         vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0);
>         trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id);
>@@ -3193,7 +3155,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>     if (vdev->device_id != PCI_ANY_ID) {
>         if (vdev->device_id > 0xffff) {
>             error_setg(errp, "invalid PCI device ID provided");
>-            goto error;
>+            goto out_detach;
>         }
>         vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0);
>         trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id);
>@@ -3204,7 +3166,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>     if (vdev->sub_vendor_id != PCI_ANY_ID) {
>         if (vdev->sub_vendor_id > 0xffff) {
>             error_setg(errp, "invalid PCI subsystem vendor ID provided");
>-            goto error;
>+            goto out_detach;
>         }
>         vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID,
>                                vdev->sub_vendor_id, ~0);
>@@ -3215,7 +3177,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>     if (vdev->sub_device_id != PCI_ANY_ID) {
>         if (vdev->sub_device_id > 0xffff) {
>             error_setg(errp, "invalid PCI subsystem device ID provided");
>-            goto error;
>+            goto out_detach;
>         }
>         vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id,
>~0);
>         trace_vfio_pci_emulated_sub_device_id(vbasedev->name,
>@@ -3248,7 +3210,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>     vfio_msix_early_setup(vdev, &err);
>     if (err) {
>         error_propagate(errp, err);
>-        goto error;
>+        goto out_detach;
>     }
>
>     vfio_bars_register(vdev);
>@@ -3364,6 +3326,8 @@ out_deregister:
> out_teardown:
>     vfio_teardown_msi(vdev);
>     vfio_bars_exit(vdev);
>+out_detach:
>+    vfio_detach_device(vbasedev);

I just reproduced a regression here with hot-unplug:

(qemu) device_add vfio-pci,host=81:11.0,id=vfio0,bus=root0
vfio: error disconnecting group 447 from container
Error: vfio 0000:81:11.0: does not support requested IGD OpRegion feature

Reason is there are some resources only released in finalize(),
so UNSET_CONTAINER call fails here.
IMHO, we can drop this vfio_detach_device call and let the same call in
finalize() do the real work as original.

After fix:
(qemu) device_add vfio-pci,host=81:11.0,id=vfio0,bus=root0
Error: vfio 0000:81:11.0: does not support requested IGD OpRegion feature

Thanks
Zhenzhong
> error:
>     error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
> }
>@@ -3371,7 +3335,6 @@ error:
> static void vfio_instance_finalize(Object *obj)
> {
>     VFIOPCIDevice *vdev = VFIO_PCI(obj);
>-    VFIOGroup *group = vdev->vbasedev.group;
>
>     vfio_display_finalize(vdev);
>     vfio_bars_finalize(vdev);
>@@ -3385,7 +3348,6 @@ static void vfio_instance_finalize(Object *obj)
>      * g_free(vdev->igd_opregion);
>      */
>     vfio_pci_put_device(vdev);
>-    vfio_put_group(group);
> }
>
> static void vfio_exitfn(PCIDevice *pdev)
>diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
>index 0ba3c5a0e2..8ac13eb106 100644
>--- a/hw/vfio/trace-events
>+++ b/hw/vfio/trace-events
>@@ -37,7 +37,8 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot,
>int function, int
> vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot
>reset: %s"
> vfio_populate_device_config(const char *name, unsigned long size, unsigned
>long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 
>0x%lx,
>flags: 0x%lx"
> vfio_populate_device_get_irq_info_failure(const char *errstr)
>"VFIO_DEVICE_GET_IRQ_INFO failure: %s"
>-vfio_realize(const char *name, int group_id) " (%s) group %d"
>+vfio_attach_device(const char *name, int group_id) " (%s) group %d"
>+vfio_detach_device(const char *name, int group_id) " (%s) group %d"
> vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
> vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s
>0x%x@0x%x"
> vfio_pci_reset(const char *name) " (%s)"
>--
>2.41.0

Reply via email to