add support for sharing directories with a guest vm

virtio-fs needs virtiofsd to be started.
In order to start virtiofsd as a process (despite being a daemon it is does not 
run
in the background), a double-fork is used.

virtiofsd should close itself together with qemu.

There are the parameters dirid
and the optional parameters direct-io & cache.
Additionally the xattr & acl parameter overwrite the
directory mapping settings for xattr & acl.

The dirid gets mapped to the path on the current node
and is also used as a mount-tag (name used to mount the
device on the guest).

example config:
```
virtiofs0: foo,direct-io=1,cache=always,acl=1
virtiofs1: dirid=bar,cache=never,xattr=1
```

For information on the optional parameters see there:
https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md

Signed-off-by: Markus Frank <m.fr...@proxmox.com>
---
 PVE/QemuServer.pm        | 185 +++++++++++++++++++++++++++++++++++++++
 PVE/QemuServer/Memory.pm |  25 ++++--
 debian/control           |   1 +
 3 files changed, 205 insertions(+), 6 deletions(-)

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 2895675..92580df 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -43,6 +43,7 @@ use PVE::PBSClient;
 use PVE::RESTEnvironment qw(log_warn);
 use PVE::RPCEnvironment;
 use PVE::Storage;
+use PVE::Mapping::Dir;
 use PVE::SysFSTools;
 use PVE::Systemd;
 use PVE::Tools qw(run_command file_read_firstline file_get_contents 
dir_glob_foreach get_host_arch $IPV6RE);
@@ -277,6 +278,42 @@ my $rng_fmt = {
     },
 };
 
+my $virtiofs_fmt = {
+    'dirid' => {
+       type => 'string',
+       default_key => 1,
+       description => "Mapping identifier of the directory mapping to be"
+           ." shared with the guest. Also used as a mount tag inside the VM.",
+       format_description => 'mapping-id',
+       format => 'pve-configid',
+    },
+    'cache' => {
+       type => 'string',
+       description => "The caching policy the file system should use"
+           ." (auto, always, never).",
+       format_description => "virtiofs-cache",
+       enum => [qw(auto always never)],
+       optional => 1,
+    },
+    'direct-io' => {
+       type => 'boolean',
+       description => "Honor the O_DIRECT flag passed down by guest 
applications",
+       format_description => "virtiofs-directio",
+       optional => 1,
+    },
+    xattr => {
+       type => 'boolean',
+       description => "Enable support for extended attributes.",
+       optional => 1,
+    },
+    acl => {
+       type => 'boolean',
+       description => "Enable support for posix ACLs (implies --xattr).",
+       optional => 1,
+    },
+};
+PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
+
 my $meta_info_fmt = {
     'ctime' => {
        type => 'integer',
@@ -839,6 +876,7 @@ while (my ($k, $v) = each %$confdesc) {
 }
 
 my $MAX_NETS = 32;
+my $MAX_VIRTIOFS = 10;
 my $MAX_SERIAL_PORTS = 4;
 my $MAX_PARALLEL_PORTS = 3;
 
@@ -948,6 +986,21 @@ my $netdesc = {
 
 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
 
+my $virtiofsdesc = {
+    optional => 1,
+    type => 'string', format => $virtiofs_fmt,
+    description => "share files between host and guest",
+};
+PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
+
+sub max_virtiofs {
+    return $MAX_VIRTIOFS;
+}
+
+for (my $i = 0; $i < $MAX_VIRTIOFS; $i++)  {
+    $confdesc->{"virtiofs$i"} = $virtiofsdesc;
+}
+
 my $ipconfig_fmt = {
     ip => {
        type => 'string',
@@ -4055,6 +4108,23 @@ sub config_to_command {
        push @$devices, '-device', $netdevicefull;
     }
 
+    my $virtiofs_enabled = 0;
+    for (my $i = 0; $i < $MAX_VIRTIOFS; $i++) {
+       my $opt = "virtiofs$i";
+
+       next if !$conf->{$opt};
+       my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+       next if !$virtiofs;
+
+       check_virtiofs_config ($conf, $virtiofs);
+
+       push @$devices, '-chardev', 
"socket,id=virtfs$i,path=/var/run/virtiofsd/vm$vmid-fs$i";
+       push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
+           .",chardev=virtfs$i,tag=$virtiofs->{dirid}";
+
+       $virtiofs_enabled = 1;
+    }
+
     if ($conf->{ivshmem}) {
        my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
 
@@ -4114,6 +4184,14 @@ sub config_to_command {
     }
     push @$machineFlags, "type=${machine_type_min}";
 
+    if ($virtiofs_enabled && !$conf->{numa}) {
+       # kvm: '-machine memory-backend' and '-numa memdev' properties are
+       # mutually exclusive
+       push @$devices, '-object', 'memory-backend-memfd,id=virtiofs-mem'
+           .",size=$conf->{memory}M,share=on";
+       push @$machineFlags, 'memory-backend=virtiofs-mem';
+    }
+
     push @$cmd, @$devices;
     push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
     push @$cmd, '-machine', join(',', @$machineFlags) if 
scalar(@$machineFlags);
@@ -4140,6 +4218,96 @@ sub config_to_command {
     return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
 }
 
+sub check_virtiofs_config {
+    my ($conf, $virtiofs) = @_;
+    my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+    my $node_list = 
PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+    my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
+    if ($acl && windows_version($conf->{ostype})) {
+       log_warn(
+           "Please disable ACLs for virtiofs on Windows VMs, otherwise"
+           ." the virtiofs shared directory cannot be mounted.\n"
+       );
+    }
+
+    if (!$node_list || scalar($node_list->@*) != 1) {
+       die "virtiofs needs exactly one mapping for this node\n";
+    }
+
+    eval {
+       PVE::Mapping::Dir::assert_valid($node_list->[0]);
+    };
+    if (my $err = $@) {
+       die "Directory Mapping invalid: $err\n";
+    }
+}
+
+sub start_virtiofs {
+    my ($vmid, $fsid, $virtiofs) = @_;
+
+    my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+    my $node_list = 
PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+    # Default to dir config xattr & acl settings
+    my $xattr = $virtiofs->{xattr} // $dir_cfg->{xattr};
+    my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
+
+    my $node_cfg = $node_list->[0];
+    my $path = $node_cfg->{path};
+    my $socket_path_root = "/var/run/virtiofsd";
+    mkdir $socket_path_root;
+    my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
+    unlink($socket_path);
+    my $socket = IO::Socket::UNIX->new(
+       Type => SOCK_STREAM,
+       Local => $socket_path,
+       Listen => 1,
+    ) or die "cannot create socket - $!\n";
+
+    my $flags = fcntl($socket, F_GETFD, 0)
+       or die "failed to get file descriptor flags: $!\n";
+    fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
+       or die "failed to remove FD_CLOEXEC from file descriptor\n";
+
+    my $fd = $socket->fileno();
+
+    my $virtiofsd_bin = '/usr/libexec/virtiofsd';
+
+    my $pid = fork();
+    if ($pid == 0) {
+       setsid();
+       $0 = "task pve-vm$vmid-virtiofs$fsid";
+       for my $fd_loop (3 .. POSIX::sysconf( &POSIX::_SC_OPEN_MAX )) {
+           POSIX::close($fd_loop) if ($fd_loop != $fd);
+       }
+
+       my $pid2 = fork();
+       if ($pid2 == 0) {
+           my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
+           push @$cmd, '--xattr' if $xattr;
+           push @$cmd, '--posix-acl' if $acl;
+           push @$cmd, '--announce-submounts' if ($node_cfg->{submounts});
+           push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'});
+           push @$cmd, "--cache=$virtiofs->{'cache'}" if 
($virtiofs->{'cache'});
+           push @$cmd, '--syslog';
+           exec(@$cmd);
+       } elsif (!defined($pid2)) {
+           die "could not fork to start virtiofsd\n";
+       } else {
+           POSIX::_exit(0);
+       }
+    } elsif (!defined($pid)) {
+       die "could not fork to start virtiofsd\n";
+    } else {
+       waitpid($pid, 0);
+    }
+
+    # return socket to keep it alive,
+    # so that qemu will wait for virtiofsd to start
+    return $socket;
+}
+
 sub check_rng_source {
     my ($source) = @_;
 
@@ -5835,6 +6003,18 @@ sub vm_start_nolock {
        PVE::Tools::run_fork sub {
            PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", 
%systemd_properties);
 
+           my @virtiofs_sockets;
+           for (my $i = 0; $i < $MAX_VIRTIOFS; $i++) {
+               my $opt = "virtiofs$i";
+
+               next if !$conf->{$opt};
+               my $virtiofs = parse_property_string('pve-qm-virtiofs', 
$conf->{$opt});
+               next if !$virtiofs;
+
+               my $virtiofs_socket = start_virtiofs($vmid, $i, $virtiofs);
+               push @virtiofs_sockets, $virtiofs_socket;
+           }
+
            my $tpmpid;
            if ((my $tpm = $conf->{tpmstate0}) && 
!PVE::QemuConfig->is_template($conf)) {
                # start the TPM emulator so QEMU can connect on start
@@ -5849,6 +6029,11 @@ sub vm_start_nolock {
                }
                die "QEMU exited with code $exitcode\n";
            }
+
+           foreach my $virtiofs_socket (@virtiofs_sockets) {
+               shutdown($virtiofs_socket, 2);
+               close($virtiofs_socket);
+           }
        };
     };
 
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index f365f2d..647595a 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -367,6 +367,16 @@ sub config {
 
     die "numa needs to be enabled to use hugepages" if $conf->{hugepages} && 
!$conf->{numa};
 
+    my $virtiofs_enabled = 0;
+    for (my $i = 0; $i < PVE::QemuServer::max_virtiofs(); $i++) {
+       my $opt = "virtiofs$i";
+       next if !$conf->{$opt};
+       my $virtiofs = 
PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+       if ($virtiofs) {
+           $virtiofs_enabled = 1;
+       }
+    }
+
     if ($conf->{numa}) {
 
        my $numa_totalmemory = undef;
@@ -379,7 +389,8 @@ sub config {
            my $numa_memory = $numa->{memory};
            $numa_totalmemory += $numa_memory;
 
-           my $mem_object = print_mem_object($conf, "ram-node$i", 
$numa_memory);
+           my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+           my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
 
            # cpus
            my $cpulists = $numa->{cpus};
@@ -404,7 +415,7 @@ sub config {
            }
 
            push @$cmd, '-object', $mem_object;
-           push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+           push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
        }
 
        die "total memory for NUMA nodes must be equal to vm static memory\n"
@@ -418,13 +429,13 @@ sub config {
                die "host NUMA node$i doesn't exist\n"
                    if !host_numanode_exists($i) && $conf->{hugepages};
 
-               my $mem_object = print_mem_object($conf, "ram-node$i", 
$numa_memory);
-               push @$cmd, '-object', $mem_object;
-
                my $cpus = ($cores * $i);
                $cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
 
-               push @$cmd, '-numa', 
"node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+               my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : 
"ram-node$i";
+               my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
+               push @$cmd, '-object', $mem_object;
+               push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
            }
        }
     }
@@ -453,6 +464,8 @@ sub print_mem_object {
        my $path = hugepages_mount_path($hugepages_size);
 
        return 
"memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
+    } elsif ($id =~ m/^virtiofs-mem/) {
+       return "memory-backend-memfd,id=$id,size=${size}M,share=on";
     } else {
        return "memory-backend-ram,id=$id,size=${size}M";
     }
diff --git a/debian/control b/debian/control
index 49f67b2..f008a9b 100644
--- a/debian/control
+++ b/debian/control
@@ -53,6 +53,7 @@ Depends: dbus,
          socat,
          swtpm,
          swtpm-tools,
+         virtiofsd,
          ${misc:Depends},
          ${perl:Depends},
          ${shlibs:Depends},
-- 
2.39.2



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to