With Qemu 4.2 we encountered a problem with unix sockets and SSH socket
forwarding for drive-mirror. It seems the socket gets reopened again and
again after it closes for some reason. This can be worked around by
specifying 'block-job-cancel' instead of 'block-job-complete' when we're
not interested in swapping the disks again from NBD to their original
protocol. This is always the case when we use drive-mirror for live
migrating a VM.

qemu_drive_mirror is used for migration and for clone_disk. All in all
we have 3 cases to handle. Either the 'skip' case which skips the
completion of the job. The 'wait' case which was the default before and
still is when $completion is undefined. And the new 'wait_noswap' case
which is used for the live migration.
If 'wait_noswap' is specified, we issue a 'block-job-cancel' once the block
job is in 'ready' state. This completes the block job without swapping the
disks.

clone_disk always uses 'block-job-cancel' via the qemu_blockjobs_cancel
sub.

Signed-off-by: Mira Limbeck <m.limb...@proxmox.com>
---
v2:
 - introduced this patch

 PVE/API2/Qemu.pm   |  3 ++-
 PVE/QemuMigrate.pm |  4 ++--
 PVE/QemuServer.pm  | 20 +++++++++++++++-----
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index d8d3f3e..21a0eae 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -2938,6 +2938,7 @@ __PACKAGE__->register_method({
                    foreach my $opt (keys %$drives) {
                        my $drive = $drives->{$opt};
                        my $skipcomplete = ($total_jobs != $i); # finish after 
last drive
+                       my $completion = $skipcomplete ? 'skip' : 'wait';
 
                        my $src_sid = 
PVE::Storage::parse_volume_id($drive->{file});
                        my $storage_list = [ $src_sid ];
@@ -2946,7 +2947,7 @@ __PACKAGE__->register_method({
 
                        my $newdrive = PVE::QemuServer::clone_disk($storecfg, 
$vmid, $running, $opt, $drive, $snapname,
                                                                   $newid, 
$storage, $format, $fullclone->{$opt}, $newvollist,
-                                                                  $jobs, 
$skipcomplete, $oldconf->{agent}, $clonelimit);
+                                                                  $jobs, 
$completion, $oldconf->{agent}, $clonelimit);
 
                        $newconf->{$opt} = 
PVE::QemuServer::print_drive($newdrive);
 
diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index 44e4c57..10c0ff2 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -703,7 +703,7 @@ sub phase2 {
            my $bwlimit = PVE::Storage::get_bandwidth_limit('migration', 
[$source_sid, $target_sid], $opt_bwlimit);
 
            $self->log('info', "$drive: start migration to $nbd_uri");
-           PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, 
undef, $self->{storage_migration_jobs}, 1, undef, $bwlimit);
+           PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, 
undef, $self->{storage_migration_jobs}, 'skip', undef, $bwlimit);
        }
     }
 
@@ -968,7 +968,7 @@ sub phase3_cleanup {
 
     if ($self->{storage_migration}) {
        # finish block-job
-       eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, 
$self->{storage_migration_jobs}); };
+       eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, 
$self->{storage_migration_jobs}, 'wait_noswap'); };
 
        if (my $err = $@) {
            eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, 
$self->{storage_migration_jobs}) };
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index b2ff515..429ec05 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -6521,7 +6521,7 @@ sub qemu_img_format {
 }
 
 sub qemu_drive_mirror {
-    my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, 
$skipcomplete, $qga, $bwlimit) = @_;
+    my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, 
$completion, $qga, $bwlimit) = @_;
 
     $jobs = {} if !$jobs;
 
@@ -6563,11 +6563,13 @@ sub qemu_drive_mirror {
        die "mirroring error: $err\n";
     }
 
-    qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $skipcomplete, $qga);
+    qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
 }
 
 sub qemu_drive_mirror_monitor {
-    my ($vmid, $vmiddst, $jobs, $skipcomplete, $qga) = @_;
+    my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
+
+    $completion //= 'wait'; # same semantic as with 'skipcomplete' before
 
     eval {
        my $err_complete = 0;
@@ -6612,7 +6614,7 @@ sub qemu_drive_mirror_monitor {
 
            if ($readycounter == scalar(keys %$jobs)) {
                print "all mirroring jobs are ready \n";
-               last if $skipcomplete; #do the complete later
+               last if $completion eq 'skip'; #do the complete later
 
                if ($vmiddst && $vmiddst != $vmid) {
                    my $agent_running = $qga && qga_check_running($vmid);
@@ -6642,7 +6644,15 @@ sub qemu_drive_mirror_monitor {
                        # try to switch the disk if source and destination are 
on the same guest
                        print "$job: Completing block job...\n";
 
-                       eval { mon_cmd($vmid, "block-job-complete", device => 
$job) };
+                       my $op;
+                       if ($completion eq 'wait') {
+                           $op = 'block-job-complete';
+                       } elsif ($completion eq 'wait_noswap') {
+                           $op = 'block-job-cancel';
+                       } else {
+                           die "invalid completion value: $completion\n";
+                       }
+                       eval { mon_cmd($vmid, $op, device => $job) };
                        if ($@ =~ m/cannot be completed/) {
                            print "$job: Block job cannot be completed, try 
again.\n";
                            $err_complete++;
-- 
2.20.1


_______________________________________________
pve-devel mailing list
pve-devel@pve.proxmox.com
https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to