Author: michim Date: Tue May 20 21:42:51 2014 New Revision: 1596422 URL: http://svn.apache.org/r1596422 Log: ZOOKEEPER-1699. Leader should timeout and give up leadership when losing quorum of last proposed configuration (Alexander Shraer via michim)
Modified: zookeeper/trunk/CHANGES.txt zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ReconfigTest.java Modified: zookeeper/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/zookeeper/trunk/CHANGES.txt?rev=1596422&r1=1596421&r2=1596422&view=diff ============================================================================== --- zookeeper/trunk/CHANGES.txt (original) +++ zookeeper/trunk/CHANGES.txt Tue May 20 21:42:51 2014 @@ -661,6 +661,9 @@ BUGFIXES: MBeans instead of use MBeanRegistry.unregisterAll() method. (César Ãlvarez Núñez via michim) + ZOOKEEPER-1699. Leader should timeout and give up leadership when losing + quorum of last proposed configuration (Alexander Shraer via michim) + IMPROVEMENTS: ZOOKEEPER-1170. Fix compiler (eclipse) warnings: unused imports, Modified: zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java URL: http://svn.apache.org/viewvc/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java?rev=1596422&r1=1596421&r2=1596422&view=diff ============================================================================== --- zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java (original) +++ zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java Tue May 20 21:42:51 2014 @@ -62,69 +62,14 @@ public class Leader { LOG.info("TCP NoDelay set to: " + nodelay); } - static public class Proposal { + static public class Proposal extends SyncedLearnerTracker { public QuorumPacket packet; - - private ArrayList<QuorumVerifierAcksetPair> qvAcksetPairs = new ArrayList<QuorumVerifierAcksetPair>(); - public Request request; @Override public String toString() { return packet.getType() + ", " + packet.getZxid() + ", " + request; } - - public void addQuorumVerifier(QuorumVerifier qv) { - qvAcksetPairs.add(new QuorumVerifierAcksetPair(qv, - new HashSet<Long>(qv.getVotingMembers().size()))); - } - - public boolean addAck(Long sid) { - boolean change = false; - for (QuorumVerifierAcksetPair qvAckset : qvAcksetPairs) { - if (qvAckset.getQuorumVerifier().getVotingMembers().containsKey(sid)) { - qvAckset.getAckset().add(sid); - change = true; - } - } - return change; - } - - public boolean hasAllQuorums() { - for (QuorumVerifierAcksetPair qvAckset : qvAcksetPairs) { - if (!qvAckset.getQuorumVerifier().containsQuorum(qvAckset.getAckset())) - return false; - } - return true; - } - - public String ackSetsToString(){ - StringBuilder sb = new StringBuilder(); - - for (QuorumVerifierAcksetPair qvAckset : qvAcksetPairs) { - sb.append(qvAckset.getAckset().toString()).append(","); - } - - return sb.substring(0, sb.length()-1); - } - - public static class QuorumVerifierAcksetPair { - private final QuorumVerifier _qv; - private final HashSet<Long> _ackset; - - public QuorumVerifierAcksetPair(QuorumVerifier qv, HashSet<Long> ackset) { - _qv = qv; - _ackset = ackset; - } - - public QuorumVerifier getQuorumVerifier() { - return _qv; - } - - public HashSet<Long> getAckset() { - return _ackset; - } - } } final LeaderZooKeeperServer zk; @@ -585,34 +530,53 @@ public class Leader { boolean tickSkip = true; while (true) { - Thread.sleep(self.tickTime / 2); - if (!tickSkip) { - self.tick++; - } - HashSet<Long> syncedSet = new HashSet<Long>(); + synchronized (this) { + long start = System.currentTimeMillis(); + long cur = start; + long end = start + self.tickTime / 2; + while (cur < end) { + wait(end - cur); + cur = System.currentTimeMillis(); + } - // lock on the followers when we use it. - syncedSet.add(self.getId()); + if (!tickSkip) { + self.tick++; + } - for (LearnerHandler f : getLearners()) { - // Synced set is used to check we have a supporting quorum, so only - // PARTICIPANT, not OBSERVER, learners should be used - if (f.synced() && self.getQuorumVerifier().getVotingMembers().containsKey(f.getSid())) { - syncedSet.add(f.getSid()); + // We use an instance of SyncedLearnerTracker to + // track synced learners to make sure we still have a + // quorum of current (and potentially next pending) view. + SyncedLearnerTracker syncedAckSet = new SyncedLearnerTracker(); + syncedAckSet.addQuorumVerifier(self.getQuorumVerifier()); + if (self.getLastSeenQuorumVerifier() != null + && self.getLastSeenQuorumVerifier().getVersion() > self + .getQuorumVerifier().getVersion()) { + syncedAckSet.addQuorumVerifier(self + .getLastSeenQuorumVerifier()); } + + syncedAckSet.addAck(self.getId()); + + for (LearnerHandler f : getLearners()) { + if (f.synced()) { + syncedAckSet.addAck(f.getSid()); + } + } + + if (!tickSkip && !syncedAckSet.hasAllQuorums()) { + // Lost quorum of last committed and/or last proposed + // config, shutdown + shutdown("Not sufficient followers synced, only synced with sids: [ " + + syncedAckSet.ackSetsToString() + " ]"); + // make sure the order is the same! + // the leader goes to looking + return; + } + tickSkip = !tickSkip; + } + for (LearnerHandler f : getLearners()) { f.ping(); } - - if (!tickSkip && !self.getQuorumVerifier().containsQuorum(syncedSet)) { - //if (!tickSkip && syncedCount < self.quorumPeers.size() / 2) { - // Lost quorum, shutdown - shutdown("Not sufficient followers synced, only synced with sids: [ " - + getSidSetString(syncedSet) + " ]"); - // make sure the order is the same! - // the leader goes to looking - return; - } - tickSkip = !tickSkip; } } finally { zk.unregisterJMX(this); Modified: zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ReconfigTest.java URL: http://svn.apache.org/viewvc/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ReconfigTest.java?rev=1596422&r1=1596421&r2=1596422&view=diff ============================================================================== --- zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ReconfigTest.java (original) +++ zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ReconfigTest.java Tue May 20 21:42:51 2014 @@ -33,6 +33,7 @@ import org.apache.zookeeper.ZooDefs; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.AsyncCallback.DataCallback; import org.apache.zookeeper.data.Stat; +import org.apache.zookeeper.server.quorum.QuorumStats; import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer; import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState; import org.apache.zookeeper.server.quorum.flexible.QuorumHierarchical; @@ -403,6 +404,50 @@ public class ReconfigTest extends ZKTest } @Test + public void testLeaderTimesoutOnNewQuorum() throws Exception { + qu = new QuorumUtil(1); // create 3 servers + qu.disableJMXTest = true; + qu.startAll(); + ZooKeeper[] zkArr = createHandles(qu); + + List<String> leavingServers = new ArrayList<String>(); + leavingServers.add("3"); + qu.shutdown(2); + try { + // Since we just shut down server 2, its still considered "synced" + // by the leader, which allows us to start the reconfig + // (PrepRequestProcessor checks that a quorum of the new + // config is synced before starting a reconfig). + // We try to remove server 3, which requires a quorum of {1,2,3} + // (we have that) and of {1,2}, but 2 is down so we won't get a + // quorum of new config ACKs. + zkArr[1].reconfig(null, leavingServers, null, -1, new Stat()); + Assert.fail("Reconfig should have failed since we don't have quorum of new config"); + } catch (KeeperException.ConnectionLossException e) { + // We expect leader to loose quorum of proposed config and time out + } catch (Exception e) { + Assert.fail("Should have been ConnectionLossException!"); + } + + // The leader should time out and remaining servers should go into + // LOOKING state. A new leader won't be established since that + // would require completing the reconfig, which is not possible while + // 2 is down. + Assert.assertEquals(QuorumStats.Provider.LOOKING_STATE, + qu.getPeer(1).peer.getServerState()); + Assert.assertEquals(QuorumStats.Provider.LOOKING_STATE, + qu.getPeer(3).peer.getServerState()); + + qu.restart(2); + + // Now that 2 is back up, they'll complete the reconfig removing 3 and + // can process other ops. + testServerHasConfig(zkArr[1], null, leavingServers); + testNormalOperation(zkArr[1], zkArr[2]); + closeAllHandles(zkArr); + } + + @Test public void testRemoveOneAsynchronous() throws Exception { qu = new QuorumUtil(2); qu.disableJMXTest = true;