> Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am 
> 11.03.2025 11:28 CET geschrieben:

some sort of description here would be great ;)

> ---
>  src/PVE/Storage.pm           |   4 +-
>  src/PVE/Storage/DirPlugin.pm |   1 +
>  src/PVE/Storage/Plugin.pm    | 232 +++++++++++++++++++++++++++++------
>  3 files changed, 196 insertions(+), 41 deletions(-)
> 
> diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
> index 3b4f041..79e5c3a 100755
> --- a/src/PVE/Storage.pm
> +++ b/src/PVE/Storage.pm
> @@ -1002,7 +1002,7 @@ sub unmap_volume {
>  }
>  
>  sub vdisk_alloc {
> -    my ($cfg, $storeid, $vmid, $fmt, $name, $size) = @_;
> +    my ($cfg, $storeid, $vmid, $fmt, $name, $size, $backing) = @_;
>  
>      die "no storage ID specified\n" if !$storeid;
>  
> @@ -1025,7 +1025,7 @@ sub vdisk_alloc {
>      # lock shared storage
>      return $plugin->cluster_lock_storage($storeid, $scfg->{shared}, undef, 
> sub {
>       my $old_umask = umask(umask|0037);
> -     my $volname = eval { $plugin->alloc_image($storeid, $scfg, $vmid, $fmt, 
> $name, $size) };
> +     my $volname = eval { $plugin->alloc_image($storeid, $scfg, $vmid, $fmt, 
> $name, $size, $backing) };
>       my $err = $@;
>       umask $old_umask;
>       die $err if $err;
> diff --git a/src/PVE/Storage/DirPlugin.pm b/src/PVE/Storage/DirPlugin.pm
> index fb23e0a..1cd7ac3 100644
> --- a/src/PVE/Storage/DirPlugin.pm
> +++ b/src/PVE/Storage/DirPlugin.pm
> @@ -81,6 +81,7 @@ sub options {
>       is_mountpoint => { optional => 1 },
>       bwlimit => { optional => 1 },
>       preallocation => { optional => 1 },
> +     snapext => { optional => 1 },
>     };
>  }
>  
> diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
> index 65cf43f..d7f485f 100644
> --- a/src/PVE/Storage/Plugin.pm
> +++ b/src/PVE/Storage/Plugin.pm
> @@ -216,6 +216,11 @@ my $defaultData = {
>           maximum => 65535,
>           optional => 1,
>       },
> +        'snapext' => {
> +         type => 'boolean',
> +         description => 'enable external snapshot.',
> +         optional => 1,
> +        },
>      },
>  };
>  
> @@ -716,7 +721,11 @@ sub filesystem_path {
>  
>      my $dir = $class->get_subdir($scfg, $vtype);
>  
> -    $dir .= "/$vmid" if $vtype eq 'images';
> +    if ($scfg->{snapext} && $snapname) {
> +     $name = $class->get_snap_volname($volname, $snapname);
> +    } else {
> +     $dir .= "/$vmid" if $vtype eq 'images';
> +    }

this is a bit weird, as it mixes volnames (with the `$vmid/` prefix) and names 
(without), it's only called twice in this patch, and this here already has 
$volname parsed, so could we maybe let get_snap_volname take and return the 
$name part without the dir?

>  
>      my $path = "$dir/$name";
>  
> @@ -873,7 +882,7 @@ sub clone_image {
>  }
>  
>  sub alloc_image {
> -    my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
> +    my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size, $backing) = @_;

this extends the storage API, so it should actually do that.. and probably 
$backing should not be an arbitrary path, but something that is resolved 
locally?

>  
>      my $imagedir = $class->get_subdir($scfg, 'images');
>      $imagedir .= "/$vmid";
> @@ -901,17 +910,11 @@ sub alloc_image {
>       umask $old_umask;
>       die $err if $err;
>      } else {
> -     my $cmd = ['/usr/bin/qemu-img', 'create'];
> -
> -     my $prealloc_opt = preallocation_cmd_option($scfg, $fmt);
> -     push @$cmd, '-o', $prealloc_opt if defined($prealloc_opt);
>  
> -     push @$cmd, '-f', $fmt, $path, "${size}K";
> -
> -     eval { run_command($cmd, errmsg => "unable to create image"); };
> +     eval { qemu_img_create($scfg, $fmt, $size, $path, $backing) };
>       if ($@) {
>           unlink $path;
> -         rmdir $imagedir;
> +         rmdir $imagedir if !$backing;

don't think this is needed, rmdir will fail if the dir isn't empty anyway..

>           die "$@";
>       }
>      }
> @@ -955,6 +958,50 @@ sub free_image {
>  # TODO taken from PVE/QemuServer/Drive.pm, avoiding duplication would be nice
>  my @checked_qemu_img_formats = qw(raw cow qcow qcow2 qed vmdk cloop);
>  
> +sub qemu_img_create {
> +    my ($scfg, $fmt, $size, $path, $backing) = @_;
> +
> +    my $cmd = ['/usr/bin/qemu-img', 'create'];
> +
> +    my $options = [];
> +
> +    if($backing) {
> +     push @$cmd, '-b', $backing, '-F', 'qcow2';
> +     push @$options, 'extended_l2=on','cluster_size=128k';
> +    };
> +    push @$options, preallocation_cmd_option($scfg, $fmt);
> +    push @$cmd, '-o', join(',', @$options) if @$options > 0;
> +    push @$cmd, '-f', $fmt, $path;
> +    push @$cmd, "${size}K" if !$backing;

is this because it will automatically take the size from the backing image?

> +
> +    run_command($cmd, errmsg => "unable to create image");
> +}
> +
> +sub qemu_img_info {
> +    my ($filename, $file_format, $timeout, $follow_backing_files) = @_;
> +
> +    my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename];
> +    push $cmd->@*, '-f', $file_format if $file_format;
> +    push $cmd->@*, '--backing-chain' if $follow_backing_files;
> +
> +    my $json = '';
> +    my $err_output = '';
> +    eval {
> +        run_command($cmd,
> +            timeout => $timeout,
> +            outfunc => sub { $json .= shift },
> +            errfunc => sub { $err_output .= shift . "\n"},
> +        );
> +    };
> +    warn $@ if $@;
> +    if ($err_output) {
> +        # if qemu did not output anything to stdout we die with stderr as an 
> error
> +        die $err_output if !$json;
> +        # otherwise we warn about it and try to parse the json
> +        warn $err_output;
> +    }
> +    return $json;
> +}
>  # set $untrusted if the file in question might be malicious since it isn't
>  # created by our stack
>  # this makes certain checks fatal, and adds extra checks for known problems 
> like
> @@ -1018,25 +1065,9 @@ sub file_size_info {
>       warn "file_size_info: '$filename': falling back to 'raw' from unknown 
> format '$file_format'\n";
>       $file_format = 'raw';
>      }
> -    my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename];
> -    push $cmd->@*, '-f', $file_format if $file_format;
>  
> -    my $json = '';
> -    my $err_output = '';
> -    eval {
> -     run_command($cmd,
> -         timeout => $timeout,
> -         outfunc => sub { $json .= shift },
> -         errfunc => sub { $err_output .= shift . "\n"},
> -     );
> -    };
> -    warn $@ if $@;
> -    if ($err_output) {
> -     # if qemu did not output anything to stdout we die with stderr as an 
> error
> -     die $err_output if !$json;
> -     # otherwise we warn about it and try to parse the json
> -     warn $err_output;
> -    }
> +    my $json = qemu_img_info($filename, $file_format, $timeout);
> +
>      if (!$json) {
>       die "failed to query file information with qemu-img\n" if $untrusted;
>       # skip decoding if there was no output, e.g. if there was a timeout.
> @@ -1162,11 +1193,29 @@ sub volume_snapshot {
>  
>      die "can't snapshot this image format\n" if $volname !~ 
> m/\.(qcow2|qed)$/;
>  
> -    my $path = $class->filesystem_path($scfg, $volname);
> +    if($scfg->{snapext}) {
> +
> +     my $path = $class->path($scfg, $volname, $storeid);
> +     my $snappath = $class->path($scfg, $volname, $storeid, $snap);
> +     #rename current volume to snap volume
> +     die "snapshot volume $snappath already exist\n" if -e $snappath;
> +     rename($path, $snappath) if -e $path;

this is still looking weird.. I don't think it makes sense interface wise to 
allow snapshotting a volume that doesn't even exist..

> +
> +     my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
> +         $class->parse_volname($volname);
> +
> +     $class->alloc_image($storeid, $scfg, $vmid, 'qcow2', $name, undef, 
> $snappath);
> +     if ($@) {
> +         eval { $class->free_image($storeid, $scfg, $volname, 0) };
> +         warn $@ if $@;

missing cleanup - this should undo the rename from above

> +     }
>  
> -    my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> +    } else {
>  
> -    run_command($cmd);
> +     my $path = $class->filesystem_path($scfg, $volname);
> +     my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> +     run_command($cmd);
> +    }
>  
>      return undef;
>  }
> @@ -1177,6 +1226,21 @@ sub volume_snapshot {
>  sub volume_rollback_is_possible {
>      my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
>  
> +    if ($scfg->{snapext}) {
> +     #technically, we could manage multibranch, we it need lot more work for 
> snapshot delete
> +     #we need to implemente block-stream from deleted snapshot to all others 
> child branchs
> +     #when online, we need to do a transaction for multiple disk when delete 
> the last snapshot
> +     #and need to merge in current running file
> +
> +     my $snappath = $class->path($scfg, $volname, $storeid, $snap);
> +     my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> +     my $parentsnap = $snapshots->{current}->{parent};

wouldn't it be enough to check that this equals $snap?

> +
> +     return 1 if $snapshots->{$parentsnap}->{file} eq $snappath;
> +
> +     die "can't rollback, '$snap' is not most recent snapshot on 
> '$volname'\n";
> +    }
> +
>      return 1;
>  }
>  
> @@ -1187,9 +1251,15 @@ sub volume_snapshot_rollback {
>  
>      my $path = $class->filesystem_path($scfg, $volname);
>  
> -    my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> -
> -    run_command($cmd);
> +    if ($scfg->{snapext}) {
> +     #simply delete the current snapshot and recreate it
> +     my $path = $class->filesystem_path($scfg, $volname);
> +     unlink($path);
> +     $class->volume_snapshot($scfg, $storeid, $volname, $snap);

instead of volume_snapshot, this could simply call alloc_image with the backing 
file? then volume_snapshot could always rename and always cleanup properly..

> +    } else {
> +     my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> +     run_command($cmd);
> +    }
>  
>      return undef;
>  }
> @@ -1201,13 +1271,49 @@ sub volume_snapshot_delete {
>  
>      return 1 if $running;
>  
> +    my $cmd = "";
>      my $path = $class->filesystem_path($scfg, $volname);
>  
> -    $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> +    if ($scfg->{snapext}) {
> +
> +     my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> +     my $snappath = $snapshots->{$snap}->{file};
> +     die "volume $snappath is missing" if !-e $snappath;
>  
> -    my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> +     my $parentsnap = $snapshots->{$snap}->{parent};
> +     my $childsnap = $snapshots->{$snap}->{child};
>  
> -    run_command($cmd);
> +     my $parentpath = $snapshots->{$parentsnap}->{file} if $parentsnap;
> +     my $childpath = $snapshots->{$childsnap}->{file} if $childsnap;

my $foo = .. if ...; 

is forbidden in our code ;) but I think we always need to have a childsnap 
anyway, right?

so we could simply check for that, and then switch around the two branches 
below so that one of them can do

if (my $parentsnap = ...) {
...
} else {
...
}

> +
> +     #if first snapshot,as it should be bigger,  we merge child, and rename 
> the snapshot to child
> +     if(!$parentsnap) {
> +         print"commit $childpath\n";
> +         $cmd = ['/usr/bin/qemu-img', 'commit', $childpath];

we could provide `-d` here to skip emptying $childpath since we renamed over it 
anyway below..

> +         eval { run_command($cmd) };
> +         if ($@) {
> +             die "error commiting $childpath to $parentpath; $@\n";

this is wrong, there is no $parentpath.. we are committing into $snappath

> +         }
> +         print"rename $snappath to $childpath\n";
> +         rename($snappath, $childpath);

what if this fails?

> +     } else {
> +         #we rebase the child image on the parent as new backing image

should we extend this to make it clear what this means? it means copying any 
parts of $snap that are not in $parent and not yet overwritten by $child into 
$child, right?

so how expensive this is depends on:
- how many changes are between $parent and $snap (increases cost)
- how many of those are overwritten by changes between $snap and $child 
(decreases cost)

> +         die "missing parentsnap snapshot to rebase child $childpath\n" if 
> !$parentpath;

how can this happen? if there is a parentsnap there must be a parentpath as 
well?

> +         $cmd = ['/usr/bin/qemu-img', 'rebase', '-b', $parentpath, '-F', 
> 'qcow2', '-f', 'qcow2', $childpath];
> +         eval { run_command($cmd) };
> +         if ($@) {
> +             die "error rebase $childpath from $parentpath; $@\n";
> +         }
> +         #delete the snapshot
> +         unlink($snappath);
> +     }
> +
> +    } else {
> +     $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> +
> +     $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> +     run_command($cmd);
> +    }
>  
>      return undef;
>  }
> @@ -1246,7 +1352,7 @@ sub volume_has_feature {
>           current => { qcow2 => 1, raw => 1, vmdk => 1 },
>       },
>       rename => {
> -         current => {qcow2 => 1, raw => 1, vmdk => 1},
> +         current => { qcow2 => 1, raw => 1, vmdk => 1},
>       },
>      };
>  
> @@ -1481,7 +1587,37 @@ sub status {
>  sub volume_snapshot_info {
>      my ($class, $scfg, $storeid, $volname) = @_;
>  
> -    die "volume_snapshot_info is not implemented for $class";
> +    my $path = $class->filesystem_path($scfg, $volname);
> +
> +    my $backing_chain = 1;
> +    my $json = qemu_img_info($path, undef, 10, $backing_chain);
> +    die "failed to query file information with qemu-img\n" if !$json;
> +    my $snapshots = eval { decode_json($json) };

missing error handlign for json decoding..

> +
> +    my $info = {};
> +    my $order = 0;
> +    for my $snap (@$snapshots) {
> +
> +     my $snapfile = $snap->{filename};
> +     my $snapname = parse_snapname($snapfile);
> +     $snapname = 'current' if !$snapname;
> +     my $snapvolname = $class->get_snap_volname($volname, $snapname);
> +
> +     $info->{$snapname}->{order} = $order;
> +     $info->{$snapname}->{file}= $snapfile;
> +     $info->{$snapname}->{volname} = $snapvolname;
> +     $info->{$snapname}->{volid} = "$storeid:$snapvolname";
> +     $info->{$snapname}->{ext} = 1;
> +
> +     my $parentfile = $snap->{'backing-filename'};
> +     if ($parentfile) {
> +         my $parentname = parse_snapname($parentfile);
> +         $info->{$snapname}->{parent} = $parentname;
> +         $info->{$parentname}->{child} = $snapname;
> +     }
> +     $order++;
> +    }
> +    return $info;
>  }
>  
>  sub activate_storage {
> @@ -1867,4 +2003,22 @@ sub config_aware_base_mkdir {
>      }
>  }
>  
> +sub get_snap_volname {
> +    my ($class, $volname, $snapname) = @_;
> +
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = 
> $class->parse_volname($volname);
> +    $name = !$snapname || $snapname eq 'current' ? $volname : 
> "$vmid/snap-$snapname-$name";

other way round would be better to group by volume first IMHO 
($vmid/snap-$name-$snapname), as this is similar to how we encode snapshots 
often on the storage level (volume@snap). we also need to have some delimiter 
between snapshot and volume name that is not allowed in either (hard for 
volname since basically everything but '/' goes, but snapshots have a 
restricted character set (configid, which means alphanumeric, hyphen and 
underscore), so we could use something like '.' as delimiter? or we switch to 
directories and do $vmid/snap/$snap/$name?)

> +    return $name;
> +}
> +
> +sub parse_snapname {
> +    my ($name) = @_;
> +
> +    my $basename = basename($name);
> +    if ($basename =~ m/^snap-(.*)-vm(.*)$/) {

this is not strict enough, see above

> +     return $1;
> +    }
> +    return undef;
> +}
> +
>  1;
> -- 
> 2.39.5


_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to