Github user revans2 commented on a diff in the pull request: https://github.com/apache/zookeeper/pull/453#discussion_r168807853 --- Diff: src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java --- @@ -888,4 +888,127 @@ public void testWithOnlyMinSessionTimeout() throws Exception { maxSessionTimeOut, quorumPeer.getMaxSessionTimeout()); } + @Test + public void testTxnAheadSnapInRetainDB() throws Exception { + // 1. start up server and wait for leader election to finish + ClientBase.setupTestEnv(); + final int SERVER_COUNT = 3; + final int clientPorts[] = new int[SERVER_COUNT]; + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < SERVER_COUNT; i++) { + clientPorts[i] = PortAssignment.unique(); + sb.append("server." + i + "=127.0.0.1:" + PortAssignment.unique() + ":" + PortAssignment.unique() + ";" + clientPorts[i] + "\n"); + } + String quorumCfgSection = sb.toString(); + + MainThread mt[] = new MainThread[SERVER_COUNT]; + ZooKeeper zk[] = new ZooKeeper[SERVER_COUNT]; + for (int i = 0; i < SERVER_COUNT; i++) { + mt[i] = new MainThread(i, clientPorts[i], quorumCfgSection); + mt[i].start(); + zk[i] = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, this); + } + + waitForAll(zk, States.CONNECTED); + + // we need to shutdown and start back up to make sure that the create session isn't the first transaction since + // that is rather innocuous. + for (int i = 0; i < SERVER_COUNT; i++) { + mt[i].shutdown(); + } + + waitForAll(zk, States.CONNECTING); + + for (int i = 0; i < SERVER_COUNT; i++) { + mt[i].start(); + // Recreate a client session since the previous session was not persisted. + zk[i] = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, this); + } + + waitForAll(zk, States.CONNECTED); + + // 2. kill all followers + int leader = -1; + Map<Long, Proposal> outstanding = null; + for (int i = 0; i < SERVER_COUNT; i++) { + if (mt[i].main.quorumPeer.leader != null) { + leader = i; + outstanding = mt[leader].main.quorumPeer.leader.outstandingProposals; + // increase the tick time to delay the leader going to looking + mt[leader].main.quorumPeer.tickTime = 10000; + } + } + LOG.warn("LEADER {}", leader); + + for (int i = 0; i < SERVER_COUNT; i++) { + if (i != leader) { + mt[i].shutdown(); + } + } + + // 3. start up the followers to form a new quorum + for (int i = 0; i < SERVER_COUNT; i++) { + if (i != leader) { + mt[i].start(); + } + } + + // 4. wait one of the follower to be the leader + for (int i = 0; i < SERVER_COUNT; i++) { + if (i != leader) { + // Recreate a client session since the previous session was not persisted. + zk[i] = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, this); + waitForOne(zk[i], States.CONNECTED); + } + } + + // 5. send a create request to leader and make sure it's synced to disk, + // which means it acked from itself + try { + zk[leader].create("/zk" + leader, "zk".getBytes(), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + Assert.fail("create /zk" + leader + " should have failed"); + } catch (KeeperException e) { + } + + // just make sure that we actually did get it in process at the + // leader + Assert.assertTrue(outstanding.size() == 1); + Proposal p = (Proposal) outstanding.values().iterator().next(); + Assert.assertTrue(p.request.getHdr().getType() == OpCode.create); + + // make sure it has a chance to write it to disk + Thread.sleep(1000); --- End diff -- I was able to do what you said and drop the 1 second sleep, but the sleep at step 6 I am going to need something else because the leader is only in the looking state for 2 ms. Leader election happens way too fast for us to be able to catch that by polling. If I remove the 4 second sleep it does not trigger the error case, I don't completely know why. I'll spend some time looking at it, but if you have any suggestions I would appreciate it.
---