[ https://issues.apache.org/jira/browse/ZOOKEEPER-2819?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16070704#comment-16070704 ]
ASF GitHub Bot commented on ZOOKEEPER-2819: ------------------------------------------- Github user shralex commented on a diff in the pull request: https://github.com/apache/zookeeper/pull/292#discussion_r125127752 --- Diff: src/java/test/org/apache/zookeeper/server/quorum/ReconfigRollingRestartCompatibilityTest.java --- @@ -0,0 +1,275 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.server.quorum; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Map; +import java.util.Set; +import java.util.List; +import java.util.HashMap; +import java.util.HashSet; +import java.util.ArrayList; + +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.PortAssignment; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.test.ClientBase; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT; + +/** + * ReconfigRollingRestartCompatibilityTest - we want to make sure that users + * can continue using the rolling restart approach when reconfig feature is disabled. + * It is important to stay compatible with rolling restart because dynamic reconfig + * has its limitation: it requires a quorum of server to work. When no quorum can be formed, + * rolling restart is the only approach to reconfigure the ensemble (e.g. removing bad nodes + * such that a new quorum with smaller number of nodes can be formed.). + * + * See ZOOKEEPER-2819 for more details. + */ +public class ReconfigRollingRestartCompatibilityTest extends QuorumPeerTestBase { + private static final String backupFileName = "zoo.cfg.bak"; + + Map<Integer, Integer> clientPorts = new HashMap<>(5); + Map<Integer, String> serverAddress = new HashMap<>(5); + + private String generateNewQuorumConfig(int serverCount) { + StringBuilder sb = new StringBuilder(); + String server; + for (int i = 0; i < serverCount; i++) { + clientPorts.put(i, PortAssignment.unique()); + server = "server." + i + "=localhost:" + PortAssignment.unique() + + ":" + PortAssignment.unique() + ":participant;localhost:" + + clientPorts.get(i); + serverAddress.put(i, server); + sb.append(server + "\n"); + } + return sb.toString(); + } + + private String updateExistingQuorumConfig(List<Integer> sidsToAdd, List<Integer> sidsToRemove) { + StringBuilder sb = new StringBuilder(); + for (Integer sid : sidsToAdd) { + clientPorts.put(sid, PortAssignment.unique()); + serverAddress.put(sid, "server." + sid + "=localhost:" + PortAssignment.unique() + + ":" + PortAssignment.unique() + ":participant;localhost:" + + clientPorts.get(sid)); + } + + for (Integer sid : sidsToRemove) { + clientPorts.remove(sid); + serverAddress.remove(sid); + } + + for (String server : serverAddress.values()) { + sb.append(server + "\n"); + } + + return sb.toString(); + } + + @Test(timeout = 60000) + // Verify no zoo.cfg.dynamic and zoo.cfg.bak files existing locally + // when reconfig feature flag is off by default. + public void testNoLocalDynamicConfigAndBackupFiles() + throws InterruptedException, IOException { + int serverCount = 3; + String config = generateNewQuorumConfig(serverCount); + QuorumPeerTestBase.MainThread mt[] = new QuorumPeerTestBase.MainThread[serverCount]; + String[] staticFileContent = new String[serverCount]; + + for (int i = 0; i < serverCount; i++) { + mt[i] = new QuorumPeerTestBase.MainThread(i, clientPorts.get(i), + config, false); + mt[i].start(); + } + + for (int i = 0; i < serverCount; i++) { + Assert.assertTrue("waiting for server " + i + " being up", + ClientBase.waitForServerUp("127.0.0.1:" + clientPorts.get(i), + CONNECTION_TIMEOUT)); + Assert.assertNull("static file backup (zoo.cfg.bak) shouldn't exist!", + mt[i].getFileByName(backupFileName)); + Assert.assertNull("dynamic configuration file (zoo.cfg.dynamic.*) shouldn't exist!", + mt[i].getFileByName(mt[i].getQuorumPeer().getNextDynamicConfigFilename())); + staticFileContent[i] = Files.readAllLines(mt[i].confFile.toPath(), StandardCharsets.UTF_8).toString(); + Assert.assertTrue("static config file should contain server entry " + serverAddress.get(i), + staticFileContent[i].contains(serverAddress.get(i))); + } + + for (int i = 0; i < serverCount; i++) { + mt[i].shutdown(); + } + } + + @Test(timeout = 60000) + // This test simulate the usual rolling restart with no membership change: + // 1. A node is shutdown first (e.g. to upgrade software, or hardware, or cleanup local data.). + // 2. After upgrade, start the node. + // 3. Do this for every node, one at a time. + public void testRollingRestartWithoutMembershipChange() + throws IOException, InterruptedException, KeeperException { + int serverCount = 3; + String config = generateNewQuorumConfig(serverCount); + QuorumPeerTestBase.MainThread mt[] = new QuorumPeerTestBase.MainThread[serverCount]; + for (int i = 0; i < serverCount; ++i) { + mt[i] = new QuorumPeerTestBase.MainThread(i, clientPorts.get(i), + config, false); + mt[i].start(); + } + + for (int i = 0; i < serverCount; ++i) { + Assert.assertTrue("waiting for server " + i + " being up", + ClientBase.waitForServerUp("127.0.0.1:" + clientPorts.get(i), + CONNECTION_TIMEOUT)); + } + + for (int i = 0; i < serverCount; ++i) { + mt[i].shutdown(); + mt[i].start(); + verifyQuorum(i); + verifyConfig(mt[i]); + } + + for (int i = 0; i < serverCount; i++) { + mt[i].shutdown(); + } + } + + @Test(timeout = 90000) + // This test simulate the use case of change of membership through rolling + // restart. For a 3 node ensemble we expand it to a 5 node ensemble, verify + // during the process each node has the expected configuration setting pushed + // via updating local zoo.cfg file. + public void testRollingRestartWithMembershipChange() + throws IOException, InterruptedException, KeeperException { + int serverCount = 3; + String config = generateNewQuorumConfig(serverCount); + QuorumPeerTestBase.MainThread mt[] = new QuorumPeerTestBase.MainThread[serverCount]; + for (int i = 0; i < serverCount; ++i) { + mt[i] = new QuorumPeerTestBase.MainThread(i, clientPorts.get(i), + config, false); + mt[i].start(); + } + + for (int i = 0; i < serverCount; ++i) { + Assert.assertTrue("waiting for server " + i + " being up", + ClientBase.waitForServerUp("127.0.0.1:" + clientPorts.get(i), + CONNECTION_TIMEOUT)); + } + + for (int i = 0; i < serverCount; ++i) { + verifyQuorum(i); + verifyConfig(mt[i]); + } + + Map<Integer, String> oldServerAddress = new HashMap<>(serverAddress); + config = updateExistingQuorumConfig(Arrays.asList(3, 4), new ArrayList<Integer>()); + serverCount = serverAddress.size(); + Assert.assertEquals("Server count should be 5 after config update.", serverCount, 5); + + mt = Arrays.copyOf(mt, mt.length + 2); --- End diff -- Please add a comment here to say that the new servers should have a config with all five servers, and the old servers a config with 3 servers. > Changing membership configuration via rolling restart does not work on 3.5.x. > ----------------------------------------------------------------------------- > > Key: ZOOKEEPER-2819 > URL: https://issues.apache.org/jira/browse/ZOOKEEPER-2819 > Project: ZooKeeper > Issue Type: Bug > Components: quorum, server > Affects Versions: 3.5.0, 3.5.1, 3.5.2, 3.5.3 > Reporter: Michael Han > Assignee: Michael Han > Priority: Critical > > In 3.5.x there is no easy way of changing the membership config using rolling > restarts because of the introduction of dynamic reconfig feature in > ZOOKEEPER-107, which automatically manages membership configuration > parameters. > ZOOKEEPER-2014 introduced a reconfigEnabled flag to turn on / off the > reconfig feature. We can use same flag and when it sets to false, it should > disable both in memory and on disk updates of membership configuration > information, besides disabling the reconfig commands on CLI which > ZOOKEEPER-2014 already did, so users can continue using rolling restarts if > needed. > We should also document explicitly the support of membership changes via > rolling restarts will be deprecated at what release time frame and promote > reconfig as the replacement. > The problem was raised at user mailing list by Guillermo Vega-Toro, reference > thread: > http://zookeeper-user.578899.n2.nabble.com/How-to-add-nodes-to-a-Zookeeper-3-5-3-beta-ensemble-with-reconfigEnabled-false-td7583138.html -- This message was sent by Atlassian JIRA (v6.4.14#64029)