--- PVE/Makefile | 1 + PVE/QemuMigrateExternal.pm | 866 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 867 insertions(+) create mode 100644 PVE/QemuMigrateExternal.pm
diff --git a/PVE/Makefile b/PVE/Makefile index 2c800f6..0494cfb 100644 --- a/PVE/Makefile +++ b/PVE/Makefile @@ -1,6 +1,7 @@ PERLSOURCE = \ QemuServer.pm \ QemuMigrate.pm \ + QemuMigrateExternal.pm \ QMPClient.pm \ QemuConfig.pm diff --git a/PVE/QemuMigrateExternal.pm b/PVE/QemuMigrateExternal.pm new file mode 100644 index 0000000..ae7783b --- /dev/null +++ b/PVE/QemuMigrateExternal.pm @@ -0,0 +1,866 @@ +package PVE::QemuMigrateExternal; + +use strict; +use warnings; +use PVE::AbstractMigrate; +use IO::File; +use IPC::Open2; +use POSIX qw( WNOHANG ); +use PVE::INotify; +use PVE::Tools; +use PVE::Cluster; +use PVE::Storage; +use PVE::QemuServer; +use Time::HiRes qw( usleep ); +use PVE::RPCEnvironment; +use PVE::ReplicationConfig; +use PVE::ReplicationState; +use PVE::Replication; + +use base qw(PVE::AbstractMigrate); + +sub fork_command_pipe { + my ($self, $cmd) = @_; + + my $reader = IO::File->new(); + my $writer = IO::File->new(); + + my $orig_pid = $$; + + my $cpid; + + eval { $cpid = open2($reader, $writer, @$cmd); }; + + my $err = $@; + + # catch exec errors + if ($orig_pid != $$) { + $self->log('err', "can't fork command pipe\n"); + POSIX::_exit(1); + kill('KILL', $$); + } + + die $err if $err; + + return { writer => $writer, reader => $reader, pid => $cpid }; +} + +sub finish_command_pipe { + my ($self, $cmdpipe, $timeout) = @_; + + my $cpid = $cmdpipe->{pid}; + return if !defined($cpid); + + my $writer = $cmdpipe->{writer}; + my $reader = $cmdpipe->{reader}; + + $writer->close(); + $reader->close(); + + my $collect_child_process = sub { + my $res = waitpid($cpid, WNOHANG); + if (defined($res) && ($res == $cpid)) { + delete $cmdpipe->{cpid}; + return 1; + } else { + return 0; + } + }; + + if ($timeout) { + for (my $i = 0; $i < $timeout; $i++) { + return if &$collect_child_process(); + sleep(1); + } + } + + $self->log('info', "ssh tunnel still running - terminating now with SIGTERM\n"); + kill(15, $cpid); + + # wait again + for (my $i = 0; $i < 10; $i++) { + return if &$collect_child_process(); + sleep(1); + } + + $self->log('info', "ssh tunnel still running - terminating now with SIGKILL\n"); + kill 9, $cpid; + sleep 1; + + $self->log('err', "ssh tunnel child process (PID $cpid) couldn't be collected\n") + if !&$collect_child_process(); +} + +sub read_tunnel { + my ($self, $tunnel, $timeout) = @_; + + $timeout = 60 if !defined($timeout); + + my $reader = $tunnel->{reader}; + + my $output; + eval { + PVE::Tools::run_with_timeout($timeout, sub { $output = <$reader>; }); + }; + die "reading from tunnel failed: $@\n" if $@; + + chomp $output; + + return $output; +} + +sub write_tunnel { + my ($self, $tunnel, $timeout, $command) = @_; + + $timeout = 60 if !defined($timeout); + + my $writer = $tunnel->{writer}; + + eval { + PVE::Tools::run_with_timeout($timeout, sub { + print $writer "$command\n"; + $writer->flush(); + }); + }; + die "writing to tunnel failed: $@\n" if $@; + + if ($tunnel->{version} && $tunnel->{version} >= 1) { + my $res = eval { $self->read_tunnel($tunnel, 10); }; + die "no reply to command '$command': $@\n" if $@; + + if ($res eq 'OK') { + return; + } else { + die "tunnel replied '$res' to command '$command'\n"; + } + } +} + +sub fork_tunnel { + my ($self, $tunnel_addr) = @_; + + my @localtunnelinfo = defined($tunnel_addr) ? ('-L' , $tunnel_addr ) : (); + + my $cmd = [@{$self->{rem_ssh}}, '-o ExitOnForwardFailure=yes', @localtunnelinfo, '/usr/sbin/qm', 'mtunnel' ]; + + my $tunnel = $self->fork_command_pipe($cmd); + + eval { + my $helo = $self->read_tunnel($tunnel, 60); + die "no reply\n" if !$helo; + die "no quorum on target node\n" if $helo =~ m/^no quorum$/; + die "got strange reply from mtunnel ('$helo')\n" + if $helo !~ m/^tunnel online$/; + }; + my $err = $@; + + eval { + my $ver = $self->read_tunnel($tunnel, 10); + if ($ver =~ /^ver (\d+)$/) { + $tunnel->{version} = $1; + $self->log('info', "ssh tunnel $ver\n"); + } else { + $err = "received invalid tunnel version string '$ver'\n" if !$err; + } + }; + + if ($err) { + $self->finish_command_pipe($tunnel); + die "can't open migration tunnel - $err"; + } + return $tunnel; +} + +sub finish_tunnel { + my ($self, $tunnel) = @_; + + eval { $self->write_tunnel($tunnel, 30, 'quit'); }; + my $err = $@; + + $self->finish_command_pipe($tunnel, 30); + + if ($tunnel->{sock_addr}) { + # ssh does not clean up on local host + my $cmd = ['rm', '-f', $tunnel->{sock_addr}]; # + PVE::Tools::run_command($cmd); + + # .. and just to be sure check on remote side + unshift @{$cmd}, @{$self->{rem_ssh}}; + PVE::Tools::run_command($cmd); + } + + die $err if $err; +} + +sub lock_vm { + my ($self, $vmid, $code, @param) = @_; + + return PVE::QemuConfig->lock_config($vmid, $code, @param); +} + +sub prepare { + my ($self, $vmid) = @_; + + my $online = $self->{opts}->{online}; + + $self->{storecfg} = PVE::Storage::config(); + + # test if VM exists + my $conf = $self->{vmconf} = PVE::QemuConfig->load_config($vmid); + + PVE::QemuConfig->check_lock($conf); + + my $running = 0; + if (my $pid = PVE::QemuServer::check_running($vmid)) { + die "can't migrate running VM without --online\n" if !$online; + $running = $pid; + + $self->{forcemachine} = PVE::QemuServer::qemu_machine_pxe($vmid, $conf); + + } + + if (my $loc_res = PVE::QemuServer::check_local_resources($conf, 1)) { + if ($self->{running} || !$self->{opts}->{force}) { + die "can't migrate VM which uses local devices\n"; + } else { + $self->log('info', "migrating VM which uses local devices"); + } + } + + # test ssh connection + my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ]; + eval { $self->cmd_quiet($cmd); }; + die "Can't connect to destination address using public key\n" if $@; + + + push @{$self->{rem_ssh}}, '-i', $self->{opts}->{migration_external_sshkey}; + + if (!$self->{opts}->{targetvmid}) { + #get remote nextvmid + eval { + my $cmd = [@{$self->{rem_ssh}}, 'pvesh', 'get', '/cluster/nextid']; + PVE::Tools::run_command($cmd, outfunc => sub { + my $line = shift; + if ($line =~ m/^(\d+)/) { + $self->{opts}->{targetvmid} = $line; + } + }); + }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + die $err; + } + + die "can't find the next free vmid on remote cluster\n" if !$self->{opts}->{targetvmid}; + } + + #create vm + $cmd = [@{$self->{rem_ssh}}, 'qm', 'create', $self->{opts}->{targetvmid}]; + + foreach my $opt (keys %{$conf}) { + next if $opt =~ m/^(pending|snapshots|digest|parent)/; + next if $opt =~ m/^(ide|scsi|virtio)(\d+)/; + + if ($opt =~ m/^(net)(\d+)/ && $self->{opts}->{targetbridge}) { + my $netid = "net$2"; + my $d = PVE::QemuServer::parse_net($conf->{$netid}); + $d->{bridge} = $self->{opts}->{targetbridge}; + $conf->{$opt} = PVE::QemuServer::print_net($d); + } + + die "can't migrate unused disk. please remove it before migrate\n" if $opt =~ m/^(unused)(\d+)/; + push @$cmd , "-$opt", PVE::Tools::shellquote($conf->{$opt}); + } + + PVE::QemuServer::foreach_drive($conf, sub { + my ($ds, $drive) = @_; + + if (PVE::QemuServer::drive_is_cdrom($drive, 1)) { + push @$cmd , "-$ds", PVE::Tools::shellquote($conf->{$ds}); + return; + } + + my $volid = $drive->{file}; + return if !$volid; + + my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1); + return if !$sid; + my $size = PVE::Storage::volume_size_info($self->{storecfg}, $volid, 5); + die "can't get size\n" if !$size; + $size = $size/1024/1024/1024; + my $targetsid = $self->{opts}->{targetstorage} ? $self->{opts}->{targetstorage} : $sid; + + my $data = { %$drive }; + delete $data->{$_} for qw(index interface file size); + my $drive_conf = "$targetsid:$size"; + foreach my $drive_opt (keys %{$data}) { + $drive_conf .= ",$drive_opt=$data->{$drive_opt}"; + } + + push @$cmd , "-$ds", PVE::Tools::shellquote($drive_conf); + }); + + push @$cmd , '-lock', 'migrate'; + + eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + die $err; + } + + return 1; + +} + +sub cleanup_remotedisks { + my ($self) = @_; + + foreach my $target_drive (keys %{$self->{target_drive}}) { + + my $drive = PVE::QemuServer::parse_drive($target_drive, $self->{target_drive}->{$target_drive}->{volid}); + my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file}); + + my $cmd = [@{$self->{rem_ssh}}, 'pvesm', 'free', "$storeid:$volname"]; + + eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + } + } +} + +sub phase1 { + my ($self, $vmid) = @_; + + $self->log('info', "starting migration of VM $vmid to node '$self->{node}' ($self->{nodeip})"); + + my $conf = $self->{vmconf}; + + # set migrate lock in config file + $conf->{lock} = 'migrate'; + PVE::QemuConfig->write_config($vmid, $conf); + +}; + +sub phase1_cleanup { + my ($self, $vmid, $err) = @_; + + $self->log('info', "aborting phase 1 - cleanup resources"); + + my $conf = $self->{vmconf}; + delete $conf->{lock}; + eval { PVE::QemuConfig->write_config($vmid, $conf) }; + if (my $err = $@) { + $self->log('err', $err); + } + + if ($self->{volumes}) { + foreach my $volid (@{$self->{volumes}}) { + $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'"); + # fixme: try to remove ? + } + } +} + +sub phase2 { + my ($self, $vmid) = @_; + + my $conf = $self->{vmconf}; + + my $targetvmid = $self->{opts}->{targetvmid} ? $self->{opts}->{targetvmid} : $vmid; + + $self->log('info', "starting VM $targetvmid on remote node '$self->{node}'"); + + my $raddr; + my $rport; + my $ruri; # the whole migration dst. URI (protocol:address[:port]) + my $nodename = PVE::INotify::nodename(); + + ## start on remote node + my $cmd = [@{$self->{rem_ssh}}]; + + my $spice_ticket; + if (PVE::QemuServer::vga_conf_has_spice($conf->{vga})) { + my $res = PVE::QemuServer::vm_mon_cmd($vmid, 'query-spice'); + $spice_ticket = $res->{ticket}; + } + + my $migration_type = $self->{opts}->{migration_type}; + + push @$cmd , 'qm', 'start', $targetvmid, '--skiplock'; + + push @$cmd, '--external_migration'; + + push @$cmd, '--migration_type', $migration_type; + + push @$cmd, '--migration_network', $self->{opts}->{migration_network} + if $self->{opts}->{migration_network}; + + push @$cmd, '--stateuri', 'unix'; + + if ($self->{forcemachine}) { + push @$cmd, '--machine', $self->{forcemachine}; + } + + my $spice_port; + + # Note: We try to keep $spice_ticket secret (do not pass via command line parameter) + # instead we pipe it through STDIN + PVE::Tools::run_command($cmd, input => $spice_ticket, outfunc => sub { + my $line = shift; + + if ($line =~ m/^migration listens on tcp:(localhost|[\d\.]+|\[[\d\.:a-fA-F]+\]):(\d+)$/) { + $raddr = $1; + $rport = int($2); + $ruri = "tcp:$raddr:$rport"; + } + elsif ($line =~ m!^migration listens on unix:(/run/qemu-server/(\d+)\.migrate)$!) { + $raddr = $1; + die "Destination UNIX sockets VMID does not match source VMID" if $targetvmid ne $2; + $ruri = "unix:$raddr"; + } + elsif ($line =~ m/^migration listens on port (\d+)$/) { + $raddr = "localhost"; + $rport = int($1); + $ruri = "tcp:$raddr:$rport"; + } + elsif ($line =~ m/^spice listens on port (\d+)$/) { + $spice_port = int($1); + } + elsif ($line =~ m/^storage migration listens on nbd:(localhost|[\d\.]+|\[[\d\.:a-fA-F]+\]):(\d+):exportname=(\S+) volume:(\S+)$/) { + my $volid = $4; + my $nbd_uri = "nbd:$1:$2:exportname=$3"; + my $targetdrive = $3; + $targetdrive =~ s/drive-//g; + + $self->{target_drive}->{$targetdrive}->{volid} = $volid; + $self->{target_drive}->{$targetdrive}->{nbd_uri} = $nbd_uri; + + } + }, errfunc => sub { + my $line = shift; + $self->log('info', $line); + }); + + die "unable to detect remote migration address\n" if !$raddr; + + $self->log('info', "start remote tunnel"); + + if ($migration_type eq 'secure') { + + if ($ruri =~ /^unix:/) { + unlink $raddr; + $self->{tunnel} = $self->fork_tunnel("$raddr:$raddr"); + $self->{tunnel}->{sock_addr} = $raddr; + + my $unix_socket_try = 0; # wait for the socket to become ready + while (! -S $raddr) { + $unix_socket_try++; + if ($unix_socket_try > 100) { + $self->{errors} = 1; + $self->finish_tunnel($self->{tunnel}); + die "Timeout, migration socket $ruri did not get ready"; + } + + usleep(50000); + } + + } elsif ($ruri =~ /^tcp:/) { + my $tunnel_addr; + if ($raddr eq "localhost") { + # for backwards compatibility with older qemu-server versions + my $pfamily = PVE::Tools::get_host_address_family($nodename); + my $lport = PVE::Tools::next_migrate_port($pfamily); + $tunnel_addr = "$lport:localhost:$rport"; + } + + $self->{tunnel} = $self->fork_tunnel($tunnel_addr); + + } else { + die "unsupported protocol in migration URI: $ruri\n"; + } + } else { + #fork tunnel for insecure migration, to send faster commands like resume + $self->{tunnel} = $self->fork_tunnel(); + } + + my $start = time(); + + $self->{storage_migration} = 1; + $self->{storage_migration_jobs} = {}; + $self->log('info', "starting storage migration"); + + foreach my $drive (keys %{$self->{target_drive}}){ + my $nbd_uri = $self->{target_drive}->{$drive}->{nbd_uri}; + $self->log('info', "$drive: start migration to $nbd_uri"); + PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 1); + } + + $self->log('info', "starting online/live migration on $ruri"); + $self->{livemigration} = 1; + + # load_defaults + my $defaults = PVE::QemuServer::load_defaults(); + + # always set migrate speed (overwrite kvm default of 32m) + # we set a very hight default of 8192m which is basically unlimited + my $migrate_speed = $defaults->{migrate_speed} || 8192; + $migrate_speed = $conf->{migrate_speed} || $migrate_speed; + $migrate_speed = $migrate_speed * 1048576; + $self->log('info', "migrate_set_speed: $migrate_speed"); + eval { + PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate_set_speed", value => int($migrate_speed)); + }; + $self->log('info', "migrate_set_speed error: $@") if $@; + + my $migrate_downtime = $defaults->{migrate_downtime}; + $migrate_downtime = $conf->{migrate_downtime} if defined($conf->{migrate_downtime}); + if (defined($migrate_downtime)) { + $self->log('info', "migrate_set_downtime: $migrate_downtime"); + eval { + PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate_set_downtime", value => int($migrate_downtime*100)/100); + }; + $self->log('info', "migrate_set_downtime error: $@") if $@; + } + + $self->log('info', "set migration_caps"); + eval { + PVE::QemuServer::set_migration_caps($vmid); + }; + warn $@ if $@; + + # set cachesize to 10% of the total memory + my $memory = $conf->{memory} || $defaults->{memory}; + my $cachesize = int($memory * 1048576 / 10); + $cachesize = round_powerof2($cachesize); + + $self->log('info', "set cachesize: $cachesize"); + eval { + PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate-set-cache-size", value => int($cachesize)); + }; + $self->log('info', "migrate-set-cache-size error: $@") if $@; + + if (PVE::QemuServer::vga_conf_has_spice($conf->{vga})) { + my $rpcenv = PVE::RPCEnvironment::get(); + my $authuser = $rpcenv->get_user(); + + my (undef, $proxyticket) = PVE::AccessControl::assemble_spice_ticket($authuser, $vmid, $self->{node}); + + my $filename = "/etc/pve/nodes/$self->{node}/pve-ssl.pem"; + my $subject = PVE::AccessControl::read_x509_subject_spice($filename); + + $self->log('info', "spice client_migrate_info"); + + eval { + PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "client_migrate_info", protocol => 'spice', + hostname => $proxyticket, 'tls-port' => $spice_port, + 'cert-subject' => $subject); + }; + $self->log('info', "client_migrate_info error: $@") if $@; + + } + + $self->log('info', "start migrate command to $ruri"); + eval { + PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate", uri => $ruri); + }; + my $merr = $@; + $self->log('info', "migrate uri => $ruri failed: $merr") if $merr; + + my $lstat = 0; + my $usleep = 1000000; + my $i = 0; + my $err_count = 0; + my $lastrem = undef; + my $downtimecounter = 0; + while (1) { + $i++; + my $avglstat = $lstat/$i if $lstat; + + usleep($usleep); + my $stat; + eval { + $stat = PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "query-migrate"); + }; + if (my $err = $@) { + $err_count++; + warn "query migrate failed: $err\n"; + $self->log('info', "query migrate failed: $err"); + if ($err_count <= 5) { + usleep(1000000); + next; + } + die "too many query migrate failures - aborting\n"; + } + + if (defined($stat->{status}) && $stat->{status} =~ m/^(setup)$/im) { + sleep(1); + next; + } + + if (defined($stat->{status}) && $stat->{status} =~ m/^(active|completed|failed|cancelled)$/im) { + $merr = undef; + $err_count = 0; + if ($stat->{status} eq 'completed') { + my $delay = time() - $start; + if ($delay > 0) { + my $mbps = sprintf "%.2f", $memory / $delay; + my $downtime = $stat->{downtime} || 0; + $self->log('info', "migration speed: $mbps MB/s - downtime $downtime ms"); + } + } + + if ($stat->{status} eq 'failed' || $stat->{status} eq 'cancelled') { + $self->log('info', "migration status error: $stat->{status}"); + die "aborting\n" + } + + if ($stat->{status} ne 'active') { + $self->log('info', "migration status: $stat->{status}"); + last; + } + + if ($stat->{ram}->{transferred} ne $lstat) { + my $trans = $stat->{ram}->{transferred} || 0; + my $rem = $stat->{ram}->{remaining} || 0; + my $total = $stat->{ram}->{total} || 0; + my $xbzrlecachesize = $stat->{"xbzrle-cache"}->{"cache-size"} || 0; + my $xbzrlebytes = $stat->{"xbzrle-cache"}->{"bytes"} || 0; + my $xbzrlepages = $stat->{"xbzrle-cache"}->{"pages"} || 0; + my $xbzrlecachemiss = $stat->{"xbzrle-cache"}->{"cache-miss"} || 0; + my $xbzrleoverflow = $stat->{"xbzrle-cache"}->{"overflow"} || 0; + # reduce sleep if remainig memory is lower than the average transfer speed + $usleep = 100000 if $avglstat && $rem < $avglstat; + + $self->log('info', "migration status: $stat->{status} (transferred ${trans}, " . + "remaining ${rem}), total ${total})"); + + if (${xbzrlecachesize}) { + $self->log('info', "migration xbzrle cachesize: ${xbzrlecachesize} transferred ${xbzrlebytes} pages ${xbzrlepages} cachemiss ${xbzrlecachemiss} overflow ${xbzrleoverflow}"); + } + + if (($lastrem && $rem > $lastrem ) || ($rem == 0)) { + $downtimecounter++; + } + $lastrem = $rem; + + if ($downtimecounter > 5) { + $downtimecounter = 0; + $migrate_downtime *= 2; + $self->log('info', "migrate_set_downtime: $migrate_downtime"); + eval { + PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate_set_downtime", value => int($migrate_downtime*100)/100); + }; + $self->log('info', "migrate_set_downtime error: $@") if $@; + } + + } + + + $lstat = $stat->{ram}->{transferred}; + + } else { + die $merr if $merr; + die "unable to parse migration status '$stat->{status}' - aborting\n"; + } + } +} + +sub phase2_cleanup { + my ($self, $vmid, $err) = @_; + + return if !$self->{errors}; + + my $targetvmid = $self->{opts}->{targetvmid} ? $self->{opts}->{targetvmid} : $vmid; + + $self->{phase2errors} = 1; + + $self->log('info', "aborting phase 2 - cleanup resources"); + + $self->log('info', "migrate_cancel"); + eval { + PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate_cancel"); + }; + $self->log('info', "migrate_cancel error: $@") if $@; + + my $conf = $self->{vmconf}; + delete $conf->{lock}; + eval { PVE::QemuConfig->write_config($vmid, $conf) }; + if (my $err = $@) { + $self->log('err', $err); + } + + # cleanup ressources on target host + if ($self->{storage_migration}) { + + eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $self->{storage_migration_jobs}) }; + if (my $err = $@) { + $self->log('err', $err); + } + } + + my $nodename = PVE::INotify::nodename(); + + my $cmd = [@{$self->{rem_ssh}}, 'qm', 'stop', $targetvmid, '--skiplock']; + + eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + } + + $cmd = [@{$self->{rem_ssh}}, 'qm', 'destroy', $targetvmid, '--skiplock']; + + eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + } + + if ($self->{tunnel}) { + eval { finish_tunnel($self, $self->{tunnel}); }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + } + } +} + +sub phase3 { + my ($self, $vmid) = @_; + + my $volids = $self->{volumes}; + return if $self->{phase2errors}; + + # destroy local copies + foreach my $volid (@$volids) { + eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); }; + if (my $err = $@) { + $self->log('err', "removing local copy of '$volid' failed - $err"); + $self->{errors} = 1; + last if $err =~ /^interrupted by signal$/; + } + } +} + +sub phase3_cleanup { + my ($self, $vmid, $err) = @_; + + my $targetvmid = $self->{opts}->{targetvmid} ? $self->{opts}->{targetvmid} : $vmid; + + my $conf = $self->{vmconf}; + return if $self->{phase2errors}; + + my $tunnel = $self->{tunnel}; + + if ($self->{storage_migration}) { + # finish block-job + eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $self->{storage_migration_jobs}); }; + + if (my $err = $@) { + eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $self->{storage_migration_jobs}) }; + eval { PVE::QemuMigrate::cleanup_remotedisks($self) }; + die "Failed to completed storage migration\n"; + } + } + + if ($self->{livemigration}) { + if ($self->{storage_migration}) { + # stop nbd server on remote vm - requirement for resume since 2.9 + my $cmd = [@{$self->{rem_ssh}}, 'qm', 'nbdstop', $targetvmid]; + + eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + } + } + + # config moved and nbd server stopped - now we can resume vm on target + if ($tunnel && $tunnel->{version} && $tunnel->{version} >= 1) { + eval { + $self->write_tunnel($tunnel, 30, "resume $targetvmid"); + }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + } + } else { + my $cmd = [@{$self->{rem_ssh}}, 'qm', 'resume', $targetvmid, '--skiplock', '--nocheck']; + my $logf = sub { + my $line = shift; + $self->log('err', $line); + }; + eval { PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => $logf); }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + } + } + + if ($self->{storage_migration} && PVE::QemuServer::parse_guest_agent($conf)->{fstrim_cloned_disks} && $self->{running}) { + my $cmd = [@{$self->{rem_ssh}}, 'qm', 'guest', 'cmd', $vmid, 'fstrim']; + eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) }; + } + } + + # close tunnel on successful migration, on error phase2_cleanup closed it + if ($tunnel) { + eval { finish_tunnel($self, $tunnel); }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + } + } + + eval { + my $timer = 0; + if (PVE::QemuServer::vga_conf_has_spice($conf->{vga}) && $self->{running}) { + $self->log('info', "Waiting for spice server migration"); + while (1) { + my $res = PVE::QemuServer::vm_mon_cmd_nocheck($vmid, 'query-spice'); + last if int($res->{'migrated'}) == 1; + last if $timer > 50; + $timer ++; + usleep(200000); + } + } + }; + + # always stop local VM + eval { PVE::QemuServer::vm_stop($self->{storecfg}, $vmid, 1, 1); }; + if (my $err = $@) { + $self->log('err', "stopping vm failed - $err"); + $self->{errors} = 1; + } + + # always deactivate volumes - avoid lvm LVs to be active on several nodes + eval { + my $vollist = PVE::QemuServer::get_vm_volumes($conf); + PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist); + }; + if (my $err = $@) { + $self->log('err', $err); + $self->{errors} = 1; + } + + # clear migrate lock + my $cmd = [ @{$self->{rem_ssh}}, 'qm', 'unlock', $targetvmid ]; + $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock"); +} + +sub final_cleanup { + my ($self, $vmid) = @_; + + # nothing to do +} + +sub round_powerof2 { + return 1 if $_[0] < 2; + return 2 << int(log($_[0]-1)/log(2)); +} + +1; -- 2.11.0 _______________________________________________ pve-devel mailing list pve-devel@pve.proxmox.com https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel