Author: fpj Date: Sat Oct 19 10:05:30 2013 New Revision: 1533725 URL: http://svn.apache.org/r1533725 Log: ZOOKEEPER-1558. Leader should not snapshot uncommitted state (fpj)
Modified: zookeeper/branches/branch-3.4/CHANGES.txt zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/SyncRequestProcessor.java zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java Modified: zookeeper/branches/branch-3.4/CHANGES.txt URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/CHANGES.txt?rev=1533725&r1=1533724&r2=1533725&view=diff ============================================================================== --- zookeeper/branches/branch-3.4/CHANGES.txt (original) +++ zookeeper/branches/branch-3.4/CHANGES.txt Sat Oct 19 10:05:30 2013 @@ -135,6 +135,8 @@ BUGFIXES: ZOOKEEPER-1646. mt c client tests fail on Ubuntu Raring (phunt) + ZOOKEEPER-1558. Leader should not snapshot uncommitted state (fpj) + IMPROVEMENTS: ZOOKEEPER-1564. Allow JUnit test build with IBM Java Modified: zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/SyncRequestProcessor.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/SyncRequestProcessor.java?rev=1533725&r1=1533724&r2=1533725&view=diff ============================================================================== --- zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/SyncRequestProcessor.java (original) +++ zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/SyncRequestProcessor.java Sat Oct 19 10:05:30 2013 @@ -65,6 +65,12 @@ public class SyncRequestProcessor extend * The number of log entries to log before starting a snapshot */ private static int snapCount = ZooKeeperServer.getSnapCount(); + + /** + * The number of log entries before rolling the log, number + * is chosen randomly + */ + private static int randRoll; private final Request requestOfDeath = Request.requestOfDeath; @@ -76,7 +82,7 @@ public class SyncRequestProcessor extend this.nextProcessor = nextProcessor; running = true; } - + /** * used by tests to check for changing * snapcounts @@ -84,6 +90,7 @@ public class SyncRequestProcessor extend */ public static void setSnapCount(int count) { snapCount = count; + randRoll = count; } /** @@ -93,6 +100,18 @@ public class SyncRequestProcessor extend public static int getSnapCount() { return snapCount; } + + /** + * Sets the value of randRoll. This method + * is here to avoid a findbugs warning for + * setting a static variable in an instance + * method. + * + * @param roll + */ + private static void setRandRoll(int roll) { + randRoll = roll; + } @Override public void run() { @@ -101,7 +120,7 @@ public class SyncRequestProcessor extend // we do this in an attempt to ensure that not all of the servers // in the ensemble take a snapshot at the same time - int randRoll = r.nextInt(snapCount/2); + setRandRoll(r.nextInt(snapCount/2)); while (true) { Request si = null; if (toFlush.isEmpty()) { Modified: zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java?rev=1533725&r1=1533724&r2=1533725&view=diff ============================================================================== --- zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java (original) +++ zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java Sat Oct 19 10:05:30 2013 @@ -284,12 +284,10 @@ public class ZooKeeperServer implements // XXX: Is lastProcessedZxid really the best thing to use? killSession(session, zkDb.getDataTreeLastProcessedZxid()); } - - // Make a clean snapshot - takeSnapshot(); } public void takeSnapshot(){ + try { txnLogFactory.save(zkDb.getDataTree(), zkDb.getSessionWithTimeOuts()); } catch (IOException e) { Modified: zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java?rev=1533725&r1=1533724&r2=1533725&view=diff ============================================================================== --- zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java (original) +++ zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java Sat Oct 19 10:05:30 2013 @@ -434,7 +434,7 @@ public class Leader { long zxid = Long.parseLong(initialZxid); zk.setZxid((zk.getZxid() & 0xffffffff00000000L) | zxid); } - + if (!System.getProperty("zookeeper.leaderServes", "yes").equals("no")) { self.cnxnFactory.setZooKeeperServer(zk); } Modified: zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java?rev=1533725&r1=1533724&r2=1533725&view=diff ============================================================================== --- zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java (original) +++ zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java Sat Oct 19 10:05:30 2013 @@ -33,23 +33,30 @@ import java.net.InetSocketAddress; import java.net.ServerSocket; import java.net.Socket; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import org.apache.jute.BinaryInputArchive; import org.apache.jute.BinaryOutputArchive; import org.apache.jute.InputArchive; import org.apache.jute.OutputArchive; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.PortAssignment; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event.EventType; import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper.States; import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.server.ByteBufferInputStream; import org.apache.zookeeper.server.ByteBufferOutputStream; import org.apache.zookeeper.server.Request; import org.apache.zookeeper.server.ServerCnxn; import org.apache.zookeeper.server.ServerCnxnFactory; +import org.apache.zookeeper.server.SyncRequestProcessor; import org.apache.zookeeper.server.ZKDatabase; import org.apache.zookeeper.server.ZooKeeperServer; import org.apache.zookeeper.server.ZooKeeperServer.DataTreeBuilder; @@ -1201,6 +1208,84 @@ public class Zab1_0Test { }); } + /** + * verify that a peer with dirty snapshot joining an established cluster + * does not go into an inconsistent state. + * + * {@link https://issues.apache.org/jira/browse/ZOOKEEPER-1558} + */ + @Test + public void testDirtySnapshot() + throws IOException, + InterruptedException, + KeeperException, + NoSuchFieldException, + IllegalAccessException { + Socket pair[] = getSocketPair(); + Socket leaderSocket = pair[0]; + Socket followerSocket = pair[1]; + File tmpDir = File.createTempFile("test", "dir"); + tmpDir.delete(); + tmpDir.mkdir(); + LeadThread leadThread = null; + Leader leader = null; + try { + // Setup a database with two znodes + FileTxnSnapLog snapLog = new FileTxnSnapLog(tmpDir, tmpDir); + ZKDatabase zkDb = new ZKDatabase(snapLog); + + long zxid = ZxidUtils.makeZxid(0, 1); + String path = "/foo"; + zkDb.processTxn(new TxnHeader(13,1000,zxid,30,ZooDefs.OpCode.create), + new CreateTxn(path, "fpjwasalsohere".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, false, 1)); + Stat stat = new Stat(); + Assert.assertEquals("fpjwasalsohere", new String(zkDb.getData(path, stat, null))); + + // Close files + snapLog.close(); + + QuorumPeer peer = createQuorumPeer(tmpDir); + + leader = createLeader(tmpDir, peer); + peer.leader = leader; + + // Set the last accepted epoch and current epochs to be 1 + peer.setAcceptedEpoch(0); + peer.setCurrentEpoch(0); + + leadThread = new LeadThread(leader); + leadThread.start(); + + while(leader.cnxAcceptor == null || !leader.cnxAcceptor.isAlive()) { + Thread.sleep(20); + } + + leader.shutdown("Shutting down the leader"); + + // Check if there is a valid snapshot (we better not have it) + File snapDir = new File (tmpDir, FileTxnSnapLog.version + FileTxnSnapLog.VERSION); + List<File> files = Util.sortDataDir(snapDir.listFiles(),"snapshot", false); + + for (File f : files) { + try { + Assert.assertFalse("Found a valid snapshot", Util.isValidSnapshot(f)); + } catch (IOException e) { + LOG.info("invalid snapshot " + f, e); + } + } + + } finally { + if (leader != null) { + leader.shutdown("end of test"); + } + if (leadThread != null) { + leadThread.interrupt(); + leadThread.join(); + } + recursiveDelete(tmpDir); + } + } + private void recursiveDelete(File file) { if (file.isFile()) { file.delete();