From: Nathan Chen <[email protected]> Open VFIO FDs from libvirt backend without exposing these FDs to XML users, i.e. one per iommufd hostdev for /dev/vfio/devices/vfioX, and pass the FD to qemu command line.
Suggested-by: Ján Tomko <[email protected]> Signed-off-by: Nathan Chen <[email protected]> --- src/libvirt_private.syms | 1 + src/qemu/qemu_command.c | 21 +++++++++++ src/qemu/qemu_process.c | 79 ++++++++++++++++++++++++++++++++++++++++ src/util/virpci.c | 69 +++++++++++++++++++++++++++++++++++ src/util/virpci.h | 2 + 5 files changed, 172 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 4e57e4a8f6..ed2b0d381e 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3159,6 +3159,7 @@ virPCIDeviceGetStubDriverName; virPCIDeviceGetStubDriverType; virPCIDeviceGetUnbindFromStub; virPCIDeviceGetUsedBy; +virPCIDeviceGetVfioPath; virPCIDeviceGetVPD; virPCIDeviceHasPCIExpressLink; virPCIDeviceIsAssignable; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 98e4469c25..2a16f9df63 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -4809,6 +4809,18 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, NULL) < 0) return NULL; + if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) { + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(dev); + + if (hostdevPriv->vfioDeviceFd != -1) { + g_autofree char *fdstr = g_strdup_printf("%d", hostdevPriv->vfioDeviceFd); + if (virJSONValueObjectAdd(&props, "S:fd", fdstr, NULL) < 0) + return NULL; + hostdevPriv->vfioDeviceFd = -1; + } + } + if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0) return NULL; @@ -5253,6 +5265,15 @@ qemuBuildHostdevCommandLine(virCommand *cmd, if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0) return -1; + if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + + if (hostdevPriv->vfioDeviceFd != -1) { + virCommandPassFD(cmd, hostdevPriv->vfioDeviceFd, + VIR_COMMAND_PASS_FD_CLOSE_PARENT); + } + } + if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev))) return -1; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 0e50cd1ccc..ab88a6bf62 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -103,6 +103,7 @@ #include "storage_source.h" #include "backup_conf.h" #include "storage_file_probe.h" +#include "virpci.h" #include "logging/log_manager.h" #include "logging/log_protocol.h" @@ -8181,6 +8182,9 @@ qemuProcessLaunch(virConnectPtr conn, if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0) goto cleanup; + if (qemuProcessOpenVfioFds(vm) < 0) + goto cleanup; + if (!(cmd = qemuBuildCommandLine(vm, incoming ? "defer" : NULL, vmop, @@ -10360,3 +10364,78 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit, qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit); virObjectUnlock(vm); } + +/** + * qemuProcessOpenVfioDeviceFd: + * @hostdev: host device definition + * @vfioFd: returned file descriptor + * + * Opens the VFIO device file descriptor for a hostdev. + * + * Returns: FD on success, -1 on failure + */ +static int +qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev) +{ + g_autofree char *vfioPath = NULL; + int fd = -1; + + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS || + hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("VFIO FD only supported for PCI hostdevs")); + return -1; + } + + if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0) + return -1; + + VIR_DEBUG("Opening VFIO device %s", vfioPath); + + if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) { + if (errno == ENOENT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"), + vfioPath); + } else { + virReportSystemError(errno, + _("cannot open VFIO device %1$s"), vfioPath); + } + return -1; + } + + VIR_DEBUG("Opened VFIO device FD %d for %s", fd, vfioPath); + return fd; +} + +/** + * qemuProcessOpenVfioFds: + * @vm: domain object + * + * Opens all necessary VFIO file descriptors for the domain. + * + * Returns: 0 on success, -1 on failure + */ +int +qemuProcessOpenVfioFds(virDomainObj *vm) +{ + size_t i; + + /* Check if we have any hostdevs that need VFIO FDs */ + for (i = 0; i < vm->def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = vm->def->hostdevs[i]; + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + /* Open VFIO device FD */ + hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev); + if (hostdevPriv->vfioDeviceFd == -1) + return -1; + } + } + + return 0; +} diff --git a/src/util/virpci.c b/src/util/virpci.c index 90617e69c6..da62ece0f6 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -3320,3 +3320,72 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address) { g_free(address); } + +/** + * virPCIDeviceGetVfioPath: + * @addr: host device PCI address + * @vfioPath: returned VFIO device path + * + * Constructs the VFIO device path for a PCI hostdev. + * + * Returns: 0 on success, -1 on failure + */ +int +virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, + char **vfioPath) +{ + g_autofree char *addrStr = NULL; + + *vfioPath = NULL; + addrStr = virPCIDeviceAddressAsString(addr); + + /* First try: Direct lookup in device's vfio-dev subdirectory */ + { + g_autofree char *sysfsPath = NULL; + g_autoptr(DIR) dir = NULL; + struct dirent *entry = NULL; + + sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/", addrStr); + + if (virDirOpen(&dir, sysfsPath) == 1) { + while (virDirRead(dir, &entry, sysfsPath) > 0) { + if (STRPREFIX(entry->d_name, "vfio")) { + *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name); + return 0; + } + } + } + } + + /* Second try: Scan /sys/class/vfio-dev */ + { + g_autofree char *sysfsPath = g_strdup("/sys/class/vfio-dev"); + g_autoptr(DIR) dir = NULL; + struct dirent *entry = NULL; + + if (virDirOpen(&dir, sysfsPath) == 1) { + while (virDirRead(dir, &entry, sysfsPath) > 0) { + g_autofree char *devLink = NULL; + g_autofree char *target = NULL; + + if (!STRPREFIX(entry->d_name, "vfio")) + continue; + + devLink = g_strdup_printf("/sys/class/vfio-dev/%s/device", entry->d_name); + + if (virFileResolveLink(devLink, &target) < 0) + continue; + + if (strstr(target, addrStr)) { + *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name); + return 0; + } + } + } + } + + virReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find VFIO device for PCI device %1$s"), + addrStr); + return -1; +} diff --git a/src/util/virpci.h b/src/util/virpci.h index fc538566e1..24ede10755 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -296,6 +296,8 @@ void virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev); void virPCIDeviceAddressFree(virPCIDeviceAddress *address); +int virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, char **vfioPath); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree); -- 2.43.0
