with this, we are able to create and use mediated devices,
which include Intel GVT-g (aka KVMGT) and Nvidia vGPUs, and probably more
types of devices in the future

Signed-off-by: Dominik Csapak <[email protected]>
---
changes from v1:
* improved cleanup loop
* fixed error message for not available instances

 PVE/QemuServer.pm     | 54 ++++++++++++++++++++++++++++++++++++++++++++++-----
 PVE/QemuServer/PCI.pm | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 0d169da..3a756df 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -1245,6 +1245,17 @@ EODESCR
        optional => 1,
        default => 0,
     },
+    'mdev' => {
+       type => 'string',
+        format_description => 'string',
+       pattern => '[^/\.:]+',
+       optional => 1,
+       description => <<EODESCR
+The type of mediated device to use.
+An instance of this type will be created on startup of the VM and
+will be cleaned up when the VM stops.
+EODESCR
+    }
 };
 PVE::JSONSchema::register_format('pve-qm-hostpci', $hostpci_fmt);
 
@@ -3540,6 +3551,15 @@ sub config_to_command {
        }
        my $pcidevices = $d->{pciid};
        my $multifunction = 1 if @$pcidevices > 1;
+       my $sysfspath;
+       if ($d->{mdev} && scalar(@$pcidevices) == 1) {
+           my $id = $pcidevices->[0]->{id};
+           my $function = $pcidevices->[0]->{function};
+           my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $i);
+           $sysfspath = "$pcisysfs/devices/0000:$id.$function/$uuid";
+       } elsif ($d->{mdev}) {
+           warn "ignoring mediated device with multifunction device\n";
+       }
 
        my $j=0;
         foreach my $pcidevice (@$pcidevices) {
@@ -3548,7 +3568,13 @@ sub config_to_command {
            $id .= ".$j" if $multifunction;
            my $addr = $pciaddr;
            $addr .= ".$j" if $multifunction;
-           my $devicestr = 
"vfio-pci,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr";
+           my $devicestr = "vfio-pci";
+           if ($sysfspath) {
+               $devicestr .= ",sysfsdev=$sysfspath";
+           } else {
+               $devicestr .= ",host=$pcidevice->{id}.$pcidevice->{function}";
+           }
+           $devicestr .= ",id=$id$addr";
 
            if($j == 0){
                $devicestr .= "$rombar$xvga";
@@ -5143,10 +5169,16 @@ sub vm_start {
                my $info = PVE::QemuServer::PCI::pci_device_info("0000:$pciid");
                die "IOMMU not present\n" if 
!PVE::QemuServer::PCI::check_iommu_support();
                die "no pci device info for device '$pciid'\n" if !$info;
-               die "can't unbind/bind pci group to vfio '$pciid'\n"
-                   if 
!PVE::QemuServer::PCI::pci_dev_group_bind_to_vfio($pciid);
-               die "can't reset pci device '$pciid'\n"
-                   if $info->{has_fl_reset} and 
!PVE::QemuServer::PCI::pci_dev_reset($info);
+
+               if ($d->{mdev}) {
+                   my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, 
$i);
+                   PVE::QemuServer::PCI::pci_create_mdev_device($pciid, $uuid, 
$d->{mdev});
+               } else {
+                   die "can't unbind/bind pci group to vfio '$pciid'\n"
+                       if 
!PVE::QemuServer::PCI::pci_dev_group_bind_to_vfio($pciid);
+                   die "can't reset pci device '$pciid'\n"
+                       if $info->{has_fl_reset} and 
!PVE::QemuServer::PCI::pci_dev_reset($info);
+               }
          }
         }
 
@@ -5386,6 +5418,18 @@ sub vm_stop_cleanup {
            unlink "/var/run/qemu-server/${vmid}.$ext";
        }
 
+       foreach my $key (keys %$conf) {
+           next if $key !~ m/^hostpci(\d+)$/;
+           my $hostpciindex = $1;
+           my $d = parse_hostpci($conf->{$key});
+           my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, 
$hostpciindex);
+
+           foreach my $pci (@{$d->{pciid}}) {
+               my $pciid = $pci->{id} . "." . $pci->{function};
+               PVE::QemuServer::PCI::pci_cleanup_mdev_device($pciid, $uuid);
+           }
+       }
+
        vmconfig_apply_pending($vmid, $conf, $storecfg) if 
$apply_pending_changes;
     };
     warn $@ if $@; # avoid errors - just warn
diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm
index 57d2f5c..19aebd7 100644
--- a/PVE/QemuServer/PCI.pm
+++ b/PVE/QemuServer/PCI.pm
@@ -303,4 +303,56 @@ sub pci_dev_group_bind_to_vfio {
     return 1;
 }
 
+sub pci_create_mdev_device {
+    my ($pciid, $uuid, $type) = @_;
+
+    my $basedir = "$pcisysfs/devices/0000:$pciid";
+    my $mdev_dir = "$basedir/mdev_supported_types";
+
+    die "pci device '$pciid' does not support mediated devices \n"
+       if !-d $mdev_dir;
+
+    die "pci device '$pciid' has no type '$type'\n"
+       if !-d "$mdev_dir/$type";
+
+    if (-d "$basedir/$uuid") {
+       # it already exists, checking type
+       my $typelink = readlink("$basedir/$uuid/mdev_type");
+       my ($existingtype) = $typelink =~ m|/([^/]+)$|;
+       die "mdev instance '$uuid' already exits, but type is not '$type'\n"
+           if $type ne $existingtype;
+
+       # instance exists, so use it but warn the user
+       warn "mdev instance '$uuid' already existed, using it.\n";
+       return undef;
+    }
+
+    my $instances = file_read_firstline("$mdev_dir/$type/available_instances");
+    my ($avail) = $instances =~ m/^(\d+)$/;
+    die "pci device '$pciid' has no available instances of '$type'\n"
+       if $avail < 1;
+
+    die "could not create 'type' for pci devices '$pciid'\n"
+       if !file_write("$mdev_dir/$type/create", $uuid);
+
+    return undef;
+}
+
+sub pci_cleanup_mdev_device {
+    my ($pciid, $uuid) = @_;
+
+    my $basedir = "$pcisysfs/devices/0000:$pciid/$uuid";
+
+    return file_write("$basedir/remove", "1");
+}
+
+# encode the hostpci index and vmid into the uuid
+sub generate_mdev_uuid {
+    my ($vmid, $hostpciindex) = @_;
+
+    my $string = sprintf("%08d-0000-0000-0000-%012d", $hostpciindex, $vmid);
+
+    return $string;
+}
+
 1;
-- 
2.11.0


_______________________________________________
pve-devel mailing list
[email protected]
https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to