If last_sync is 0, the VM configuration has been stolen
(either manually or by HA recovery).
Under this condition, the replication snapshot should not be deleted.
This snapshot is used to restore replication state.
If the last_snap is greater than 0 and does not match the snap name
it must be a remnant of an earlier sync and should be deleted.
---
 PVE/Replication.pm | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/PVE/Replication.pm b/PVE/Replication.pm
index 89a9572..bae04ef 100644
--- a/PVE/Replication.pm
+++ b/PVE/Replication.pm
@@ -135,22 +135,26 @@ sub prepare {
                (defined($parent_snapname) && ($snap eq $parent_snapname))) {
                $last_snapshots->{$volid}->{$snap} = 1;
            } elsif ($snap =~ m/^\Q$prefix\E/) {
-               $logfunc->("delete stale replication snapshot '$snap' on 
$volid");
-
-               eval {
-                   PVE::Storage::volume_snapshot_delete($storecfg, $volid, 
$snap);
-                   $cleaned_replicated_volumes->{$volid} = 1;
-               };
-
-               # If deleting the snapshot fails, we can not be sure if it was 
due to an error or a timeout.
-               # The likelihood that the delete has worked out is high at a 
timeout.
-               # If it really fails, it will try to remove on the next run.
-               if (my $err = $@) {
-                   # warn is for syslog/journal.
-                   warn $err;
-
-                   # logfunc will written in replication log.
-                   $logfunc->("delete stale replication snapshot error: $err");
+               if ($last_sync != 0) {
+                   $logfunc->("delete stale replication snapshot '$snap' on 
$volid");
+                   eval {
+                       PVE::Storage::volume_snapshot_delete($storecfg, $volid, 
$snap);
+                       $cleaned_replicated_volumes->{$volid} = 1;
+                   };
+
+                   # If deleting the snapshot fails, we can not be sure if it 
was due to an error or a timeout.
+                   # The likelihood that the delete has worked out is high at 
a timeout.
+                   # If it really fails, it will try to remove on the next run.
+                   if (my $err = $@) {
+                       # warn is for syslog/journal.
+                       warn $err;
+
+                       # logfunc will written in replication log.
+                       $logfunc->("delete stale replication snapshot error: 
$err");
+                   }           
+               # Last_sync=0 and a replication snapshot only occur, if the VM 
was stolen
+               } else {
+                   $last_snapshots->{$volid}->{$snap} = 1;
                }
            }
        }
@@ -217,11 +221,11 @@ sub replicate {
            my $store_list = [ map { (PVE::Storage::parse_volume_id($_))[0] } 
@$sorted_volids ];
 
            my $ssh_info = PVE::Cluster::get_ssh_info($jobcfg->{target});
-           remote_prepare_local_job($ssh_info, $jobid, $vmid, [], $store_list, 
0, undef, 1, $logfunc);
+           remote_prepare_local_job($ssh_info, $jobid, $vmid, [], $store_list, 
1, undef, 1, $logfunc);
 
        }
        # remove all local replication snapshots (lastsync => 0)
-       prepare($storecfg, $sorted_volids, $jobid, 0, undef, $logfunc);
+       prepare($storecfg, $sorted_volids, $jobid, 1, undef, $logfunc);
 
        PVE::ReplicationConfig::delete_job($jobid); # update config
        $logfunc->("job removed");
-- 
2.11.0


_______________________________________________
pve-devel mailing list
[email protected]
https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to