symat commented on a change in pull request #1356: URL: https://github.com/apache/zookeeper/pull/1356#discussion_r430210333
########## File path: zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Leader.java ########## @@ -943,6 +944,7 @@ public synchronized boolean tryToCommit(Proposal p, long zxid, SocketAddress fol self.processReconfig(newQV, designatedLeader, zk.getZxid(), true); if (designatedLeader != self.getId()) { + LOG.info("Committing a reconfiguration; this leader is not the designated leader anymore, setting allowedToCommit = false"); Review comment: thanks, good idea ########## File path: zookeeper-server/src/test/java/org/apache/zookeeper/test/ReconfigExceptionTest.java ########## @@ -90,6 +90,16 @@ public void tearDown() throws Exception { @Test(timeout = 10000) public void testReconfigDisabled() throws InterruptedException { QuorumPeerConfig.setReconfigEnabled(false); + + // for thsi test we need to restart the quorum peers to get the config change, Review comment: thanks! ########## File path: zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/ReconfigRollingRestartCompatibilityTest.java ########## @@ -209,6 +209,237 @@ public void testRollingRestartWithMembershipChange() throws Exception { } } + @Test + public void testRollingRestartWithExtendedMembershipConfig() throws Exception { + // in this test we are performing rolling restart with extended quorum config, see ZOOKEEPER-3829 + + // Start a quorum with 3 members + int serverCount = 3; + String config = generateNewQuorumConfig(serverCount); + QuorumPeerTestBase.MainThread[] mt = new QuorumPeerTestBase.MainThread[serverCount]; + List<String> joiningServers = new ArrayList<>(); + for (int i = 0; i < serverCount; i++) { + mt[i] = new QuorumPeerTestBase.MainThread(i, clientPorts.get(i), config, false); + mt[i].start(); + joiningServers.add(serverAddress.get(i)); + } + for (int i = 0; i < serverCount; i++) { + assertTrue("waiting for server " + i + " being up", ClientBase.waitForServerUp("127.0.0.1:" + clientPorts.get(i), CONNECTION_TIMEOUT)); + } + for (int i = 0; i < serverCount; i++) { + verifyQuorumConfig(i, joiningServers, null); + verifyQuorumMembers(mt[i]); + } + + // Create updated config with 4 members + List<String> newServers = new ArrayList<>(joiningServers); + config = updateExistingQuorumConfig(Arrays.asList(3), new ArrayList<>()); + newServers.add(serverAddress.get(3)); + serverCount = serverAddress.size(); + assertEquals("Server count should be 4 after config update.", serverCount, 4); + + // We are adding one new server to the ensemble. The new server should be started with the new config + mt = Arrays.copyOf(mt, mt.length + 1); + mt[3] = new QuorumPeerTestBase.MainThread(3, clientPorts.get(3), config, false); + mt[3].start(); + assertTrue("waiting for server 3 being up", ClientBase.waitForServerUp("127.0.0.1:" + clientPorts.get(3), CONNECTION_TIMEOUT)); + verifyQuorumConfig(3, newServers, null); + verifyQuorumMembers(mt[3]); + + // Now we restart the first 3 servers, one-by-one with the new config + for (int i = 0; i < 3; i++) { + mt[i].shutdown(); + + assertTrue(String.format("Timeout during waiting for server %d to go down", i), + ClientBase.waitForServerDown("127.0.0.1:" + clientPorts.get(i), ClientBase.CONNECTION_TIMEOUT)); + + mt[i] = new QuorumPeerTestBase.MainThread(i, clientPorts.get(i), config, false); + mt[i].start(); + assertTrue("waiting for server " + i + " being up", ClientBase.waitForServerUp("127.0.0.1:" + clientPorts.get(i), CONNECTION_TIMEOUT)); + verifyQuorumConfig(i, newServers, null); + verifyQuorumMembers(mt[i]); + } + + // now verify that all nodes can handle traffic + for (int i = 0; i < 4; ++i) { + ZooKeeper zk = ClientBase.createZKClient("127.0.0.1:" + clientPorts.get(i)); + ReconfigTest.testNormalOperation(zk, zk, false); + } + + for (int i = 0; i < 4; ++i) { + mt[i].shutdown(); + } + } + + + @Test + public void testRollingRestartWithExtendedMembershipConfigRestartingLeaderFirst() throws Exception { + // in this test we are performing rolling restart with extended quorum config, see ZOOKEEPER-3829 + // first we start the new nodes, then restart the current leader + // this is a special case, as the old servers will not be able to join to the new nodes after they became leader + + // Start a quorum with 3 members + int serverCount = 3; + String config = generateNewQuorumConfig(serverCount); + QuorumPeerTestBase.MainThread[] mt = new QuorumPeerTestBase.MainThread[serverCount]; + List<String> joiningServers = new ArrayList<>(); + for (int i = 0; i < serverCount; i++) { + mt[i] = new QuorumPeerTestBase.MainThread(i, clientPorts.get(i), config, false); + mt[i].start(); + joiningServers.add(serverAddress.get(i)); + } + for (int i = 0; i < serverCount; i++) { + assertTrue("waiting for server " + i + " being up", ClientBase.waitForServerUp("127.0.0.1:" + clientPorts.get(i), CONNECTION_TIMEOUT)); + } + for (int i = 0; i < serverCount; i++) { + verifyQuorumConfig(i, joiningServers, null); + verifyQuorumMembers(mt[i]); + } + + // Create updated config with 5 members + List<String> newServers = new ArrayList<>(joiningServers); + config = updateExistingQuorumConfig(Arrays.asList(3, 4), new ArrayList<>()); + newServers.add(serverAddress.get(3)); + newServers.add(serverAddress.get(4)); + serverCount = serverAddress.size(); + assertEquals("Server count should be 5 after config update.", serverCount, 5); + + // We are adding two new servers to the ensemble. The new server should be started with the new config + mt = Arrays.copyOf(mt, mt.length + 2); + for (int i = 3; i < 5; i++) { + mt[i] = new QuorumPeerTestBase.MainThread(i, clientPorts.get(i), config, false); + mt[i].start(); + assertTrue("waiting for server " + i + " being up", ClientBase.waitForServerUp("127.0.0.1:" + clientPorts.get(i), CONNECTION_TIMEOUT)); + verifyQuorumConfig(i, newServers, null); + verifyQuorumMembers(mt[i]); + } + + // Now we restart the leader first. This should trigger a leader election where a newly added server becomes + // the new leader (as it has the highest id) + int oldLeaderId = -1; + for (int i = 0; i < 3; i++) { + if (mt[i].getQuorumPeer().isLeader(i)) { + oldLeaderId = i; + mt[i].shutdown(); + + assertTrue(String.format("Timeout during waiting for server %d to go down", i), + ClientBase.waitForServerDown("127.0.0.1:" + clientPorts.get(i), ClientBase.CONNECTION_TIMEOUT)); + + mt[i] = new QuorumPeerTestBase.MainThread(i, clientPorts.get(i), config, false); + mt[i].start(); + assertTrue("waiting for server " + i + " being up", ClientBase.waitForServerUp("127.0.0.1:" + clientPorts.get(i), CONNECTION_TIMEOUT)); + verifyQuorumConfig(i, newServers, null); + verifyQuorumMembers(mt[i]); Review comment: It wouldn't hurt, I will add it (although the new leader shouldn't be selected from the first three servers). Still, a 'break' would make the code easier to follow for the reader. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org