Author: breed
Date: Thu Jul 28 05:49:59 2011
New Revision: 1151738
URL: http://svn.apache.org/viewvc?rev=1151738&view=rev
Log:
ZOOKEEPER-1090. Race condition while taking snapshot can lead to not restoring
data tree correctly.
Modified:
zookeeper/trunk/CHANGES.txt
zookeeper/trunk/src/java/main/org/apache/zookeeper/server/DataTree.java
zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LoadFromLogTest.java
Modified: zookeeper/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/zookeeper/trunk/CHANGES.txt?rev=1151738&r1=1151737&r2=1151738&view=diff
==============================================================================
--- zookeeper/trunk/CHANGES.txt (original)
+++ zookeeper/trunk/CHANGES.txt Thu Jul 28 05:49:59 2011
@@ -266,6 +266,8 @@ BUGFIXES:
ZOOKEEPER-1119. zkServer stop command incorrectly reading comment lines in
zoo.cfg (phunt via mahadev)
+ ZOOKEEPER-1090. Race condition while taking snapshot can lead to not
restoring data tree correctly (Vishal K via breed)
+
IMPROVEMENTS:
ZOOKEEPER-724. Improve junit test integration - log harness information
(phunt via mahadev)
Modified:
zookeeper/trunk/src/java/main/org/apache/zookeeper/server/DataTree.java
URL:
http://svn.apache.org/viewvc/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/DataTree.java?rev=1151738&r1=1151737&r2=1151738&view=diff
==============================================================================
--- zookeeper/trunk/src/java/main/org/apache/zookeeper/server/DataTree.java
(original)
+++ zookeeper/trunk/src/java/main/org/apache/zookeeper/server/DataTree.java Thu
Jul 28 05:49:59 2011
@@ -791,9 +791,6 @@ public class DataTree {
rc.type = header.getType();
rc.err = 0;
rc.multiResult = null;
- if (rc.zxid > lastProcessedZxid) {
- lastProcessedZxid = rc.zxid;
- }
switch (header.getType()) {
case OpCode.create:
CreateTxn createTxn = (CreateTxn) txn;
@@ -914,6 +911,23 @@ public class DataTree {
} catch (IOException e) {
LOG.warn("Failed:" + debug, e);
}
+ /*
+ * A snapshot might be in progress while we are modifying the data
+ * tree. If we set lastProcessedZxid prior to making corresponding
+ * change to the tree, then the zxid associated with the snapshot
+ * file will be ahead of its contents. Thus, while restoring from
+ * the snapshot, the restore method will not apply the transaction
+ * for zxid associated with the snapshot file, since the restore
+ * method assumes that transaction to be present in the snapshot.
+ *
+ * To avoid this, we first apply the transaction and then modify
+ * lastProcessedZxid. During restore, we correctly handle the
+ * case where the snapshot contains data ahead of the zxid associated
+ * with the file.
+ */
+ if (rc.zxid > lastProcessedZxid) {
+ lastProcessedZxid = rc.zxid;
+ }
return rc;
}
Modified:
zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LoadFromLogTest.java
URL:
http://svn.apache.org/viewvc/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LoadFromLogTest.java?rev=1151738&r1=1151737&r2=1151738&view=diff
==============================================================================
---
zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LoadFromLogTest.java
(original)
+++
zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LoadFromLogTest.java
Thu Jul 28 05:49:59 2011
@@ -43,10 +43,10 @@ import org.apache.zookeeper.server.DataT
import org.apache.zookeeper.server.DataNode;
import org.apache.zookeeper.txn.CreateTxn;
import org.apache.zookeeper.txn.DeleteTxn;
-import org.apache.zookeeper.txn.TxnHeader;
import org.apache.zookeeper.ZooDefs.OpCode;
import org.apache.jute.Record;
import java.io.FileInputStream;
+
import org.apache.jute.BinaryInputArchive;
import org.apache.zookeeper.server.persistence.FileHeader;
@@ -59,6 +59,7 @@ public class LoadFromLogTest extends ZKT
// setting up the quorum has a transaction overhead for creating and
closing the session
private static final int TRANSACTION_OVERHEAD = 2;
private static final int TOTAL_TRANSACTIONS = NUM_MESSAGES +
TRANSACTION_OVERHEAD;
+ private volatile boolean connected;
/**
* test that all transactions from the Log are loaded, and only once
@@ -114,7 +115,22 @@ public class LoadFromLogTest extends ZKT
public void process(WatchedEvent event) {
- // do nothing
+ switch (event.getType()) {
+ case None:
+ switch (event.getState()) {
+ case SyncConnected:
+ connected = true;
+ break;
+ case Disconnected:
+ connected = false;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
}
/**
@@ -215,4 +231,111 @@ public class LoadFromLogTest extends ZKT
Assert.assertTrue("Missing magic number ",
header.getMagic() == FileTxnLog.TXNLOG_MAGIC);
}
-}
+
+ /**
+ * Test we can restore the snapshot that has data ahead of the zxid
+ * of the snapshot file.
+ */
+ @Test
+ public void testRestore() throws Exception {
+ // setup a single server cluster
+ File tmpDir = ClientBase.createTmpDir();
+ ClientBase.setupTestEnv();
+ ZooKeeperServer zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
+ SyncRequestProcessor.setSnapCount(10000);
+ final int PORT = Integer.parseInt(HOSTPORT.split(":")[1]);
+ ServerCnxnFactory f = ServerCnxnFactory.createFactory(PORT, -1);
+ f.startup(zks);
+ Assert.assertTrue("waiting for server being up ", ClientBase
+ .waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT));
+ ZooKeeper zk = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT,
this);
+
+ long start = System.currentTimeMillis();
+ while (!connected) {
+ long end = System.currentTimeMillis();
+ if (end - start > 5000) {
+ Assert.assertTrue("Could not connect with
server in 5 seconds",
+ false);
+ }
+ try {
+ Thread.sleep(200);
+ } catch (Exception e) {
+ LOG.warn("Intrrupted");
+ }
+
+ }
+ // generate some transactions
+ String lastPath = null;
+ try {
+ zk.create("/invalidsnap", new byte[0],
Ids.OPEN_ACL_UNSAFE,
+ CreateMode.PERSISTENT);
+ for (int i = 0; i < NUM_MESSAGES; i++) {
+ lastPath = zk.create("/invalidsnap/test-", new
byte[0],
+ Ids.OPEN_ACL_UNSAFE,
CreateMode.PERSISTENT_SEQUENTIAL);
+ }
+ } finally {
+ zk.close();
+ }
+ String[] tokens = lastPath.split("-");
+ String expectedPath = "/invalidsnap/test-"
+ + String.format("%010d",
+ (new
Integer(tokens[1])).intValue() + 1);
+ long eZxid = zks.getZKDatabase().getDataTreeLastProcessedZxid();
+ // force the zxid to be behind the content
+ zks.getZKDatabase().setlastProcessedZxid(
+
zks.getZKDatabase().getDataTreeLastProcessedZxid() - 10);
+ LOG.info("Set lastProcessedZxid to "
+ +
zks.getZKDatabase().getDataTreeLastProcessedZxid());
+ // Force snapshot and restore
+ zks.takeSnapshot();
+ zks.shutdown();
+ f.shutdown();
+
+ zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
+ SyncRequestProcessor.setSnapCount(10000);
+ f = ServerCnxnFactory.createFactory(PORT, -1);
+ f.startup(zks);
+ Assert.assertTrue("waiting for server being up ", ClientBase
+ .waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT));
+ connected = false;
+ long fZxid = zks.getZKDatabase().getDataTreeLastProcessedZxid();
+
+ // Verify lastProcessedZxid is set correctly
+ Assert.assertTrue("Restore failed expected zxid=" + eZxid + "
found="
+ + fZxid, fZxid == eZxid);
+ zk = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, this);
+ start = System.currentTimeMillis();
+ while (!connected) {
+ long end = System.currentTimeMillis();
+ if (end - start > 5000) {
+ Assert.assertTrue("Could not connect with
server in 5 seconds",
+ false);
+ }
+ try {
+ Thread.sleep(200);
+ } catch (Exception e) {
+ LOG.warn("Intrrupted");
+ }
+
+ }
+ // Verify correctness of data and whether sequential znode
creation
+ // proceeds correctly after this point
+ String[] children;
+ String path;
+ try {
+ children = zk.getChildren("/invalidsnap",
false).toArray(
+ new String[0]);
+ path = zk.create("/invalidsnap/test-", new byte[0],
+ Ids.OPEN_ACL_UNSAFE,
CreateMode.PERSISTENT_SEQUENTIAL);
+ } finally {
+ zk.close();
+ }
+ LOG.info("Expected " + expectedPath + " found " + path);
+ Assert.assertTrue("Error in sequential znode creation expected "
+ + expectedPath + " found " + path,
path.equals(expectedPath));
+ Assert.assertTrue("Unexpected number of children " +
children.length
+ + " expected " + NUM_MESSAGES,
+ (children.length == NUM_MESSAGES));
+ f.shutdown();
+ }
+}
\ No newline at end of file