Github user revans2 commented on a diff in the pull request: https://github.com/apache/zookeeper/pull/453#discussion_r169662234 --- Diff: src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java --- @@ -888,4 +923,103 @@ public void testWithOnlyMinSessionTimeout() throws Exception { maxSessionTimeOut, quorumPeer.getMaxSessionTimeout()); } + @Test + public void testFailedTxnAsPartOfQuorumLoss() throws Exception { + // 1. start up server and wait for leader election to finish + ClientBase.setupTestEnv(); + final int SERVER_COUNT = 3; + servers = LaunchServers(SERVER_COUNT); + + waitForAll(servers, States.CONNECTED); + + // we need to shutdown and start back up to make sure that the create session isn't the first transaction since + // that is rather innocuous. + servers.shutDownAllServers(); + waitForAll(servers, States.CONNECTING); + servers.restartAllServersAndClients(this); + waitForAll(servers, States.CONNECTED); + + // 2. kill all followers + int leader = servers.findLeader(); + Map<Long, Proposal> outstanding = servers.mt[leader].main.quorumPeer.leader.outstandingProposals; + // increase the tick time to delay the leader going to looking + servers.mt[leader].main.quorumPeer.tickTime = 10000; + LOG.warn("LEADER {}", leader); + + for (int i = 0; i < SERVER_COUNT; i++) { + if (i != leader) { + servers.mt[i].shutdown(); + } + } + + // 3. start up the followers to form a new quorum + for (int i = 0; i < SERVER_COUNT; i++) { + if (i != leader) { + servers.mt[i].start(); + } + } + + // 4. wait one of the follower to be the new leader + for (int i = 0; i < SERVER_COUNT; i++) { + if (i != leader) { + // Recreate a client session since the previous session was not persisted. + servers.restartClient(i, this); + waitForOne(servers.zk[i], States.CONNECTED); + } + } + + // 5. send a create request to old leader and make sure it's synced to disk, + // which means it acked from itself + try { + servers.zk[leader].create("/zk" + leader, "zk".getBytes(), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + Assert.fail("create /zk" + leader + " should have failed"); + } catch (KeeperException e) { + } + + // just make sure that we actually did get it in process at the + // leader + Assert.assertEquals(1, outstanding.size()); + Proposal p = outstanding.values().iterator().next(); + Assert.assertEquals(OpCode.create, p.request.getHdr().getType()); + + // make sure it has a chance to write it to disk + int sleepTime = 0; + Long longLeader = new Long(leader); + while (!p.qvAcksetPairs.get(0).getAckset().contains(longLeader)) { + if (sleepTime > 2000) { + Assert.fail("Transaction not synced to disk within 1 second " + p.qvAcksetPairs.get(0).getAckset() + + " expected " + leader); + } + Thread.sleep(100); + sleepTime += 100; + } + + // 6. wait for the leader to quit due to not enough followers and come back up as a part of the new quorum + sleepTime = 0; + Follower f = servers.mt[leader].main.quorumPeer.follower; + while (f == null || !f.isRunning()) { + if (sleepTime > 10_000) { + Assert.fail("Took too long for old leader to time out " + servers.mt[leader].main.quorumPeer.getPeerState()); + } + Thread.sleep(100); + sleepTime += 100; + f = servers.mt[leader].main.quorumPeer.follower; + } + servers.mt[leader].shutdown(); --- End diff -- It is a lot of very specific steps that make the data inconsistency show up. This is needed to force the transaction log to be replayed which has an edit in it that wasn't considered as a part of leader election.
---