with this, we are able to create and use mediated devices, which include Intel GVT-g (aka KVMGT) and Nvidia vGPUs, and probably more types of devices in the future
Signed-off-by: Dominik Csapak <[email protected]> --- changes from v1: * improved cleanup loop * fixed error message for not available instances PVE/QemuServer.pm | 54 ++++++++++++++++++++++++++++++++++++++++++++++----- PVE/QemuServer/PCI.pm | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 5 deletions(-) diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 0d169da..3a756df 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -1245,6 +1245,17 @@ EODESCR optional => 1, default => 0, }, + 'mdev' => { + type => 'string', + format_description => 'string', + pattern => '[^/\.:]+', + optional => 1, + description => <<EODESCR +The type of mediated device to use. +An instance of this type will be created on startup of the VM and +will be cleaned up when the VM stops. +EODESCR + } }; PVE::JSONSchema::register_format('pve-qm-hostpci', $hostpci_fmt); @@ -3540,6 +3551,15 @@ sub config_to_command { } my $pcidevices = $d->{pciid}; my $multifunction = 1 if @$pcidevices > 1; + my $sysfspath; + if ($d->{mdev} && scalar(@$pcidevices) == 1) { + my $id = $pcidevices->[0]->{id}; + my $function = $pcidevices->[0]->{function}; + my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $i); + $sysfspath = "$pcisysfs/devices/0000:$id.$function/$uuid"; + } elsif ($d->{mdev}) { + warn "ignoring mediated device with multifunction device\n"; + } my $j=0; foreach my $pcidevice (@$pcidevices) { @@ -3548,7 +3568,13 @@ sub config_to_command { $id .= ".$j" if $multifunction; my $addr = $pciaddr; $addr .= ".$j" if $multifunction; - my $devicestr = "vfio-pci,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr"; + my $devicestr = "vfio-pci"; + if ($sysfspath) { + $devicestr .= ",sysfsdev=$sysfspath"; + } else { + $devicestr .= ",host=$pcidevice->{id}.$pcidevice->{function}"; + } + $devicestr .= ",id=$id$addr"; if($j == 0){ $devicestr .= "$rombar$xvga"; @@ -5143,10 +5169,16 @@ sub vm_start { my $info = PVE::QemuServer::PCI::pci_device_info("0000:$pciid"); die "IOMMU not present\n" if !PVE::QemuServer::PCI::check_iommu_support(); die "no pci device info for device '$pciid'\n" if !$info; - die "can't unbind/bind pci group to vfio '$pciid'\n" - if !PVE::QemuServer::PCI::pci_dev_group_bind_to_vfio($pciid); - die "can't reset pci device '$pciid'\n" - if $info->{has_fl_reset} and !PVE::QemuServer::PCI::pci_dev_reset($info); + + if ($d->{mdev}) { + my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $i); + PVE::QemuServer::PCI::pci_create_mdev_device($pciid, $uuid, $d->{mdev}); + } else { + die "can't unbind/bind pci group to vfio '$pciid'\n" + if !PVE::QemuServer::PCI::pci_dev_group_bind_to_vfio($pciid); + die "can't reset pci device '$pciid'\n" + if $info->{has_fl_reset} and !PVE::QemuServer::PCI::pci_dev_reset($info); + } } } @@ -5386,6 +5418,18 @@ sub vm_stop_cleanup { unlink "/var/run/qemu-server/${vmid}.$ext"; } + foreach my $key (keys %$conf) { + next if $key !~ m/^hostpci(\d+)$/; + my $hostpciindex = $1; + my $d = parse_hostpci($conf->{$key}); + my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $hostpciindex); + + foreach my $pci (@{$d->{pciid}}) { + my $pciid = $pci->{id} . "." . $pci->{function}; + PVE::QemuServer::PCI::pci_cleanup_mdev_device($pciid, $uuid); + } + } + vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes; }; warn $@ if $@; # avoid errors - just warn diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm index 57d2f5c..19aebd7 100644 --- a/PVE/QemuServer/PCI.pm +++ b/PVE/QemuServer/PCI.pm @@ -303,4 +303,56 @@ sub pci_dev_group_bind_to_vfio { return 1; } +sub pci_create_mdev_device { + my ($pciid, $uuid, $type) = @_; + + my $basedir = "$pcisysfs/devices/0000:$pciid"; + my $mdev_dir = "$basedir/mdev_supported_types"; + + die "pci device '$pciid' does not support mediated devices \n" + if !-d $mdev_dir; + + die "pci device '$pciid' has no type '$type'\n" + if !-d "$mdev_dir/$type"; + + if (-d "$basedir/$uuid") { + # it already exists, checking type + my $typelink = readlink("$basedir/$uuid/mdev_type"); + my ($existingtype) = $typelink =~ m|/([^/]+)$|; + die "mdev instance '$uuid' already exits, but type is not '$type'\n" + if $type ne $existingtype; + + # instance exists, so use it but warn the user + warn "mdev instance '$uuid' already existed, using it.\n"; + return undef; + } + + my $instances = file_read_firstline("$mdev_dir/$type/available_instances"); + my ($avail) = $instances =~ m/^(\d+)$/; + die "pci device '$pciid' has no available instances of '$type'\n" + if $avail < 1; + + die "could not create 'type' for pci devices '$pciid'\n" + if !file_write("$mdev_dir/$type/create", $uuid); + + return undef; +} + +sub pci_cleanup_mdev_device { + my ($pciid, $uuid) = @_; + + my $basedir = "$pcisysfs/devices/0000:$pciid/$uuid"; + + return file_write("$basedir/remove", "1"); +} + +# encode the hostpci index and vmid into the uuid +sub generate_mdev_uuid { + my ($vmid, $hostpciindex) = @_; + + my $string = sprintf("%08d-0000-0000-0000-%012d", $hostpciindex, $vmid); + + return $string; +} + 1; -- 2.11.0 _______________________________________________ pve-devel mailing list [email protected] https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
