This is an automated email from the ASF dual-hosted git repository.
ddiederen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zookeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 1214d3b ZOOKEEPER-3781: Create snapshots on followers when
snapshot.trust.empty is true
1214d3b is described below
commit 1214d3bf611d153ae8c3987523da01d3d6c82686
Author: Stig Rohde Døssing <[email protected]>
AuthorDate: Sat Mar 6 19:38:43 2021 +0000
ZOOKEEPER-3781: Create snapshots on followers when snapshot.trust.empty is
true
snapshot.trust.empty is an escape hatch for users upgrading from 3.4.x to
later Zookeeper
versions, allowing nodes to start with a non-empty transaction log but no
snapshot.
The intent is for this setting to be enabled for a short while during the
upgrade,
and then disabled again, as the check it disables is a safety feature.
Prior to this PR, a node would only write a snapshot locally if it became
leader,
or if it had fallen so far behind the leader that the leader sent a SNAP
message instead
of a DIFF. This made the upgrade process inconvenient, as not all nodes
would create
a snapshot when snapshot.trust.empty was true, meaning that the safety
check could
not be flipped back on.
This PR makes follower nodes write a local snapshot when they receive
NEWLEADER,
if they have no local snapshot and snapshot.trust.empty is true.
Author: Stig Rohde Døssing <[email protected]>
Reviewers: Enrico Olivelli <[email protected]>, Damien Diederen
<[email protected]>
Closes #1581 from srdo/zookeeper-3781
---
.../apache/zookeeper/server/ZooKeeperServer.java | 4 ++
.../server/persistence/FileTxnSnapLog.java | 9 ++++
.../apache/zookeeper/server/quorum/Learner.java | 8 +++-
.../test/EmptiedSnapshotRecoveryTest.java | 50 ++++++++++++++++++++++
.../java/org/apache/zookeeper/test/QuorumUtil.java | 12 +++---
5 files changed, 76 insertions(+), 7 deletions(-)
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
index 8b3e492..f0e3c82 100644
---
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
@@ -544,6 +544,10 @@ public class ZooKeeperServer implements SessionExpirer,
ServerStats.Provider {
ServerMetrics.getMetrics().SNAPSHOT_TIME.add(elapsed);
}
+ public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() {
+ return
txnLogFactory.shouldForceWriteInitialSnapshotAfterLeaderElection();
+ }
+
@Override
public long getDataDirSize() {
if (zkDb == null) {
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
index f054bc8..a4670ec 100644
---
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -230,6 +230,15 @@ public class FileTxnSnapLog {
}
/**
+ * whether to force the write of an initial snapshot after a leader
election,
+ * to address ZOOKEEPER-3781 after upgrading from Zookeeper 3.4.x.
+ * @return true if an initial snapshot should be written even if not
otherwise required, false otherwise.
+ */
+ public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() {
+ return trustEmptySnapshot && getLastSnapshotInfo() == null;
+ }
+
+ /**
* this function restores the server
* database after reading from the
* snapshots and transaction logs
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
index ef36b2f..8f3d00c 100644
---
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
@@ -561,7 +561,13 @@ public class Learner {
if (qp.getType() == Leader.DIFF) {
LOG.info("Getting a diff from the leader 0x{}",
Long.toHexString(qp.getZxid()));
self.setSyncMode(QuorumPeer.SyncMode.DIFF);
- snapshotNeeded = false;
+ if (zk.shouldForceWriteInitialSnapshotAfterLeaderElection()) {
+ LOG.info("Forcing a snapshot write as part of upgrading
from an older Zookeeper. This should only happen while upgrading.");
+ snapshotNeeded = true;
+ syncSnapshot = true;
+ } else {
+ snapshotNeeded = false;
+ }
} else if (qp.getType() == Leader.SNAP) {
self.setSyncMode(QuorumPeer.SyncMode.SNAP);
LOG.info("Getting a snapshot from leader 0x{}",
Long.toHexString(qp.getZxid()));
diff --git
a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
index 316d7bf..b9020cc 100644
---
a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
+++
b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
@@ -19,11 +19,13 @@
package org.apache.zookeeper.test;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
+import java.nio.file.Files;
import java.util.List;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.PortAssignment;
@@ -143,6 +145,54 @@ public class EmptiedSnapshotRecoveryTest extends
ZKTestCase implements Watcher {
runTest(false, true);
}
+ @Test
+ public void testRestoreWithTrustedEmptySnapFilesWhenFollowing() throws
Exception {
+ QuorumUtil qu = new QuorumUtil(1);
+ try {
+ qu.startAll();
+ String connString = qu.getConnectionStringForServer(1);
+ try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT,
this)) {
+ for (int i = 0; i < N_TRANSACTIONS; i++) {
+ zk.create("/node-" + i, new byte[0], Ids.OPEN_ACL_UNSAFE,
CreateMode.PERSISTENT);
+ }
+ }
+ int leaderIndex = qu.getLeaderServer();
+ //Shut down the cluster and delete the snapshots from the followers
+ for (int i = 1; i <= qu.ALL; i++) {
+ qu.shutdown(i);
+ if (i != leaderIndex) {
+ FileTxnSnapLog txnLogFactory =
qu.getPeer(i).peer.getTxnFactory();
+ List<File> snapshots =
txnLogFactory.findNRecentSnapshots(10);
+ assertTrue(snapshots.size() > 0, "We have a snapshot to
corrupt");
+ for (File file : snapshots) {
+ Files.delete(file.toPath());
+ }
+
assertEquals(txnLogFactory.findNRecentSnapshots(10).size(), 0);
+ }
+ }
+ //Start while trusting empty snapshots, verify that the followers
save snapshots
+ System.setProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY,
"true");
+ qu.start(leaderIndex);
+ for (int i = 1; i <= qu.ALL; i++) {
+ if (i != leaderIndex) {
+ qu.restart(i);
+ FileTxnSnapLog txnLogFactory =
qu.getPeer(i).peer.getTxnFactory();
+ List<File> snapshots =
txnLogFactory.findNRecentSnapshots(10);
+ assertTrue(snapshots.size() > 0, "A snapshot should have
been created on follower " + i);
+ }
+ }
+ //Check that the created nodes are still there
+ try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT,
this)) {
+ for (int i = 0; i < N_TRANSACTIONS; i++) {
+ assertNotNull(zk.exists("/node-" + i, false));
+ }
+ }
+ } finally {
+
System.clearProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY);
+ qu.tearDown();
+ }
+ }
+
public void process(WatchedEvent event) {
// do nothing
}
diff --git
a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
index 98153b9..ce1cd1b 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
@@ -251,22 +251,22 @@ public class QuorumUtil {
public void shutdown(int id) {
QuorumPeer qp = getPeer(id).peer;
try {
- LOG.info("Shutting down quorum peer {}", qp.getName());
+ LOG.info("Shutting down quorum peer {} with id {}", qp.getName(),
id);
qp.shutdown();
Election e = qp.getElectionAlg();
if (e != null) {
- LOG.info("Shutting down leader election {}", qp.getName());
+ LOG.info("Shutting down leader election {} with id {}",
qp.getName(), id);
e.shutdown();
} else {
- LOG.info("No election available to shutdown {}", qp.getName());
+ LOG.info("No election available to shutdown {} with id {}",
qp.getName(), id);
}
- LOG.info("Waiting for {} to exit thread", qp.getName());
+ LOG.info("Waiting for {} with id {} to exit thread", qp.getName(),
id);
qp.join(30000);
if (qp.isAlive()) {
- fail("QP failed to shutdown in 30 seconds: " + qp.getName());
+ fail("QP failed to shutdown in 30 seconds: " + qp.getName() +
" " + id);
}
} catch (InterruptedException e) {
- LOG.debug("QP interrupted: {}", qp.getName(), e);
+ LOG.debug("QP interrupted: {} {}", qp.getName(), id, e);
}
}