On Thu, Dec 18, 2025 at 06:19:21PM -0800, Nathan Chen via Devel wrote:
> From: Nathan Chen <[email protected]>
>
> Open VFIO FDs from libvirt backend without exposing
> these FDs to XML users, i.e. one per iommufd hostdev
> for /dev/vfio/devices/vfioX, and pass the FD to qemu
> command line.
>
> Suggested-by: Ján Tomko <[email protected]>
> Signed-off-by: Nathan Chen <[email protected]>
> ---
> src/libvirt_private.syms | 1 +
> src/qemu/qemu_command.c | 21 +++++++++++
> src/qemu/qemu_process.c | 79 ++++++++++++++++++++++++++++++++++++++++
> src/util/virpci.c | 69 +++++++++++++++++++++++++++++++++++
> src/util/virpci.h | 2 +
> 5 files changed, 172 insertions(+)
>
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 4e57e4a8f6..ed2b0d381e 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -3159,6 +3159,7 @@ virPCIDeviceGetStubDriverName;
> virPCIDeviceGetStubDriverType;
> virPCIDeviceGetUnbindFromStub;
> virPCIDeviceGetUsedBy;
> +virPCIDeviceGetVfioPath;
> virPCIDeviceGetVPD;
> virPCIDeviceHasPCIExpressLink;
> virPCIDeviceIsAssignable;
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 98e4469c25..2a16f9df63 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -4809,6 +4809,18 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
> NULL) < 0)
> return NULL;
>
> + if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
> + pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) {
> + qemuDomainHostdevPrivate *hostdevPriv =
> QEMU_DOMAIN_HOSTDEV_PRIVATE(dev);
> +
> + if (hostdevPriv->vfioDeviceFd != -1) {
> + g_autofree char *fdstr = g_strdup_printf("%d",
> hostdevPriv->vfioDeviceFd);
> + if (virJSONValueObjectAdd(&props, "S:fd", fdstr, NULL) < 0)
> + return NULL;
> + hostdevPriv->vfioDeviceFd = -1;
> + }
> + }
> +
> if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0)
> return NULL;
>
> @@ -5253,6 +5265,15 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
> if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) <
> 0)
> return -1;
>
> + if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) {
> + qemuDomainHostdevPrivate *hostdevPriv =
> QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
> +
> + if (hostdevPriv->vfioDeviceFd != -1) {
> + virCommandPassFD(cmd, hostdevPriv->vfioDeviceFd,
> + VIR_COMMAND_PASS_FD_CLOSE_PARENT);
> + }
> + }
> +
> if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev)))
> return -1;
>
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 0e50cd1ccc..ab88a6bf62 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -103,6 +103,7 @@
> #include "storage_source.h"
> #include "backup_conf.h"
> #include "storage_file_probe.h"
> +#include "virpci.h"
>
> #include "logging/log_manager.h"
> #include "logging/log_protocol.h"
> @@ -8181,6 +8182,9 @@ qemuProcessLaunch(virConnectPtr conn,
> if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0)
> goto cleanup;
>
> + if (qemuProcessOpenVfioFds(vm) < 0)
> + goto cleanup;
> +
> if (!(cmd = qemuBuildCommandLine(vm,
> incoming ? "defer" : NULL,
> vmop,
> @@ -10360,3 +10364,78 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess
> *nbdkit,
> qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0,
> nbdkit);
> virObjectUnlock(vm);
> }
> +
> +/**
> + * qemuProcessOpenVfioDeviceFd:
> + * @hostdev: host device definition
> + * @vfioFd: returned file descriptor
> + *
> + * Opens the VFIO device file descriptor for a hostdev.
> + *
> + * Returns: FD on success, -1 on failure
> + */
> +static int
> +qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev)
> +{
> + g_autofree char *vfioPath = NULL;
> + int fd = -1;
> +
> + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
> + hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("VFIO FD only supported for PCI hostdevs"));
> + return -1;
> + }
> +
> + if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr,
> &vfioPath) < 0)
> + return -1;
> +
> + VIR_DEBUG("Opening VFIO device %s", vfioPath);
> +
> + if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) {
> + if (errno == ENOENT) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("VFIO device %1$s not found - ensure device is
> bound to vfio-pci driver"),
> + vfioPath);
> + } else {
> + virReportSystemError(errno,
> + _("cannot open VFIO device %1$s"),
> vfioPath);
> + }
> + return -1;
> + }
> +
> + VIR_DEBUG("Opened VFIO device FD %d for %s", fd, vfioPath);
> + return fd;
> +}
> +
> +/**
> + * qemuProcessOpenVfioFds:
> + * @vm: domain object
> + *
> + * Opens all necessary VFIO file descriptors for the domain.
> + *
> + * Returns: 0 on success, -1 on failure
> + */
> +int
> +qemuProcessOpenVfioFds(virDomainObj *vm)
> +{
> + size_t i;
> +
> + /* Check if we have any hostdevs that need VFIO FDs */
> + for (i = 0; i < vm->def->nhostdevs; i++) {
> + virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
> + qemuDomainHostdevPrivate *hostdevPriv =
> QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
> +
> + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
> + hostdev->source.subsys.type ==
> VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
> + hostdev->source.subsys.u.pci.driver.name ==
> VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
> + hostdev->source.subsys.u.pci.driver.iommufd ==
> VIR_TRISTATE_BOOL_YES) {
> + /* Open VFIO device FD */
> + hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev);
> + if (hostdevPriv->vfioDeviceFd == -1)
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> diff --git a/src/util/virpci.c b/src/util/virpci.c
> index 90617e69c6..da62ece0f6 100644
> --- a/src/util/virpci.c
> +++ b/src/util/virpci.c
> @@ -3320,3 +3320,72 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address)
> {
> g_free(address);
> }
> +
> +/**
> + * virPCIDeviceGetVfioPath:
> + * @addr: host device PCI address
> + * @vfioPath: returned VFIO device path
> + *
> + * Constructs the VFIO device path for a PCI hostdev.
> + *
> + * Returns: 0 on success, -1 on failure
> + */
> +int
> +virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr,
> + char **vfioPath)
> +{
> + g_autofree char *addrStr = NULL;
> +
> + *vfioPath = NULL;
> + addrStr = virPCIDeviceAddressAsString(addr);
> +
> + /* First try: Direct lookup in device's vfio-dev subdirectory */
> + {
> + g_autofree char *sysfsPath = NULL;
> + g_autoptr(DIR) dir = NULL;
> + struct dirent *entry = NULL;
> +
> + sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/",
> addrStr);
> +
> + if (virDirOpen(&dir, sysfsPath) == 1) {
> + while (virDirRead(dir, &entry, sysfsPath) > 0) {
> + if (STRPREFIX(entry->d_name, "vfio")) {
> + *vfioPath = g_strdup_printf("/dev/vfio/devices/%s",
> entry->d_name);
> + return 0;
> + }
> + }
> + }
> + }
> +
> + /* Second try: Scan /sys/class/vfio-dev */
> + {
> + g_autofree char *sysfsPath = g_strdup("/sys/class/vfio-dev");
> + g_autoptr(DIR) dir = NULL;
> + struct dirent *entry = NULL;
> +
> + if (virDirOpen(&dir, sysfsPath) == 1) {
> + while (virDirRead(dir, &entry, sysfsPath) > 0) {
> + g_autofree char *devLink = NULL;
> + g_autofree char *target = NULL;
> +
> + if (!STRPREFIX(entry->d_name, "vfio"))
> + continue;
> +
> + devLink = g_strdup_printf("/sys/class/vfio-dev/%s/device",
> entry->d_name);
> +
> + if (virFileResolveLink(devLink, &target) < 0)
> + continue;
> +
> + if (strstr(target, addrStr)) {
> + *vfioPath = g_strdup_printf("/dev/vfio/devices/%s",
> entry->d_name);
> + return 0;
> + }
> + }
> + }
> + }
Why do we need to try two different approaches in this code ? What's the
reason
why the first approach would fail but the second pass ?
> +
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("cannot find VFIO device for PCI device %1$s"),
> + addrStr);
> + return -1;
> +}
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|