This is an automated email from the ASF dual-hosted git repository. symat pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/zookeeper.git
commit a5f0eb0e5303426f9d7e3b0062e8d396ac9d98e5 Author: Mohammad Arshad <[email protected]> AuthorDate: Thu Apr 1 02:54:37 2021 +0530 ZOOKEEPER-4269: acceptedEpoch.tmp rename failure will cause server startup error Using accepted epoch from acceptedEpoch.tmp if it is available Author: Mohammad Arshad <[email protected]> Reviewers: Enrico Olivelli <[email protected]>,Damien Diederen <[email protected]> Closes #1668 from arshadmohammad/ZOOKEEPER-4269-branch-3.6 (cherry picked from commit 54e563bfe13508fc3707d45e47d37e0c201f19ed) --- .../zookeeper/common/AtomicFileOutputStream.java | 3 +- .../apache/zookeeper/server/quorum/QuorumPeer.java | 14 ++- .../quorum/CurrentEpochWriteFailureTest.java | 117 +++++++++++++++++++++ 3 files changed, 132 insertions(+), 2 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/common/AtomicFileOutputStream.java b/zookeeper-server/src/main/java/org/apache/zookeeper/common/AtomicFileOutputStream.java index 740ae8f67..35f3379ca 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/common/AtomicFileOutputStream.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/common/AtomicFileOutputStream.java @@ -44,7 +44,8 @@ import org.slf4j.LoggerFactory; * place. */ public class AtomicFileOutputStream extends FilterOutputStream { - private static final String TMP_EXTENSION = ".tmp"; + + public static final String TMP_EXTENSION = ".tmp"; private final static Logger LOG = LoggerFactory .getLogger(AtomicFileOutputStream.class); diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java index daf605cab..54128d4ff 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java @@ -47,6 +47,7 @@ import java.util.concurrent.atomic.AtomicReference; import javax.security.sasl.SaslException; import org.apache.zookeeper.KeeperException.BadArgumentsException; +import org.apache.zookeeper.common.AtomicFileOutputStream; import org.apache.zookeeper.common.AtomicFileWritingIdiom; import org.apache.zookeeper.common.AtomicFileWritingIdiom.WriterStatement; import org.apache.zookeeper.common.QuorumX509Util; @@ -976,7 +977,18 @@ public class QuorumPeer extends ZooKeeperThread implements QuorumStats.Provider writeLongToFile(CURRENT_EPOCH_FILENAME, currentEpoch); } if (epochOfZxid > currentEpoch) { - throw new IOException("The current epoch, " + ZxidUtils.zxidToString(currentEpoch) + ", is older than the last zxid, " + lastProcessedZxid); + // acceptedEpoch.tmp file in snapshot directory + File currentTmp = new File(getTxnFactory().getSnapDir(), + CURRENT_EPOCH_FILENAME + AtomicFileOutputStream.TMP_EXTENSION); + if (currentTmp.exists()) { + long epochOfTmp = readLongFromFile(currentTmp.getName()); + LOG.info("{} found. Setting current epoch to {}.", currentTmp, epochOfTmp); + setCurrentEpoch(epochOfTmp); + } else { + throw new IOException( + "The current epoch, " + ZxidUtils.zxidToString(currentEpoch) + + ", is older than the last zxid, " + lastProcessedZxid); + } } try { acceptedEpoch = readLongFromFile(ACCEPTED_EPOCH_FILENAME); diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CurrentEpochWriteFailureTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CurrentEpochWriteFailureTest.java new file mode 100644 index 000000000..9a172eee1 --- /dev/null +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CurrentEpochWriteFailureTest.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.zookeeper.server.quorum; + +import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import java.io.File; +import java.io.IOException; +import org.apache.commons.io.FileUtils; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.common.AtomicFileOutputStream; +import org.apache.zookeeper.test.ClientBase; +import org.junit.After; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CurrentEpochWriteFailureTest extends QuorumPeerTestBase { + protected static final Logger LOG = LoggerFactory.getLogger(CurrentEpochWriteFailureTest.class); + private Servers servers; + private int clientPort; + + @After + public void tearDown() throws InterruptedException { + if (servers != null) { + servers.shutDownAllServers(); + } + } + + /* + * ZOOKEEPER-4269: + * accepted epoch is first written to temporary file acceptedEpoch.tmp then this file is + * renamed to acceptedEpoch. + * Failure, either because of exception or power-off, in renaming the acceptedEpoch.tmp file + * will cause server startup error with message "The current epoch, x, is older than the last + * zxid y" + * To handle this scenario we should read accepted epoch from this temp file as well. + */ + @Test + public void testReadCurrentEpochFromAcceptedEpochTmpFile() throws Exception { + startServers(); + writeSomeData(); + + restartServers(); + writeSomeData(); + + MainThread firstServer = servers.mt[0]; + + // As started servers two times, current epoch must be two + long currentEpoch = firstServer.getQuorumPeer().getCurrentEpoch(); + assertEquals(2, currentEpoch); + + // Initialize files for later use + File snapDir = firstServer.getQuorumPeer().getTxnFactory().getSnapDir(); + File currentEpochFile = new File(snapDir, QuorumPeer.CURRENT_EPOCH_FILENAME); + File currentEpochTempFile = new File(snapDir, + QuorumPeer.CURRENT_EPOCH_FILENAME + AtomicFileOutputStream.TMP_EXTENSION); + + // Shutdown servers + servers.shutDownAllServers(); + waitForAll(servers, ZooKeeper.States.CONNECTING); + + // Create scenario of file currentEpoch.tmp rename to currentEpoch failure. + // In this case currentEpoch file will have old epoch and currentEpoch.tmp will have the latest epoch + FileUtils.write(currentEpochFile, Long.toString(currentEpoch - 1), "UTF-8"); + FileUtils.write(currentEpochTempFile, Long.toString(currentEpoch), "UTF-8"); + + // Restart the serves, all serves should restart successfully. + servers.restartAllServersAndClients(this); + + // Check the first server where problem was injected. + assertTrue("server " + firstServer.getMyid() + + " is not up as file currentEpoch.tmp rename to currentEpoch file was failed" + + " which lead current epoch inconsistent state.", ClientBase + .waitForServerUp("127.0.0.1:" + firstServer.getClientPort(), CONNECTION_TIMEOUT)); + } + + private void restartServers() throws InterruptedException, IOException { + servers.shutDownAllServers(); + waitForAll(servers, ZooKeeper.States.CONNECTING); + servers.restartAllServersAndClients(this); + waitForAll(servers, ZooKeeper.States.CONNECTED); + } + + private void writeSomeData() throws Exception { + ZooKeeper client = ClientBase.createZKClient("127.0.0.1:" + clientPort); + String path = "/somePath" + System.currentTimeMillis(); + String data = "someData"; + client.create(path, data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + byte[] data1 = client.getData(path, false, null); + assertEquals(data, new String(data1)); + client.close(); + } + + private void startServers() throws Exception { + servers = LaunchServers(3); + clientPort = servers.clientPorts[0]; + } +}
