[
https://issues.apache.org/jira/browse/ZOOKEEPER-2819?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16070709#comment-16070709
]
ASF GitHub Bot commented on ZOOKEEPER-2819:
-------------------------------------------
Github user shralex commented on a diff in the pull request:
https://github.com/apache/zookeeper/pull/292#discussion_r125125684
--- Diff:
src/java/test/org/apache/zookeeper/server/quorum/ReconfigRollingRestartCompatibilityTest.java
---
@@ -0,0 +1,275 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.quorum;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.List;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.ArrayList;
+
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.ZooDefs;
+import org.apache.zookeeper.PortAssignment;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.test.ClientBase;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT;
+
+/**
+ * ReconfigRollingRestartCompatibilityTest - we want to make sure that
users
+ * can continue using the rolling restart approach when reconfig feature
is disabled.
+ * It is important to stay compatible with rolling restart because dynamic
reconfig
+ * has its limitation: it requires a quorum of server to work. When no
quorum can be formed,
+ * rolling restart is the only approach to reconfigure the ensemble (e.g.
removing bad nodes
+ * such that a new quorum with smaller number of nodes can be formed.).
+ *
+ * See ZOOKEEPER-2819 for more details.
+ */
+public class ReconfigRollingRestartCompatibilityTest extends
QuorumPeerTestBase {
+ private static final String backupFileName = "zoo.cfg.bak";
+
+ Map<Integer, Integer> clientPorts = new HashMap<>(5);
+ Map<Integer, String> serverAddress = new HashMap<>(5);
+
+ private String generateNewQuorumConfig(int serverCount) {
+ StringBuilder sb = new StringBuilder();
+ String server;
+ for (int i = 0; i < serverCount; i++) {
+ clientPorts.put(i, PortAssignment.unique());
+ server = "server." + i + "=localhost:" +
PortAssignment.unique()
+ + ":" + PortAssignment.unique() +
":participant;localhost:"
+ + clientPorts.get(i);
+ serverAddress.put(i, server);
+ sb.append(server + "\n");
+ }
+ return sb.toString();
+ }
+
+ private String updateExistingQuorumConfig(List<Integer> sidsToAdd,
List<Integer> sidsToRemove) {
+ StringBuilder sb = new StringBuilder();
+ for (Integer sid : sidsToAdd) {
+ clientPorts.put(sid, PortAssignment.unique());
+ serverAddress.put(sid, "server." + sid + "=localhost:" +
PortAssignment.unique()
+ + ":" + PortAssignment.unique() +
":participant;localhost:"
+ + clientPorts.get(sid));
+ }
+
+ for (Integer sid : sidsToRemove) {
+ clientPorts.remove(sid);
+ serverAddress.remove(sid);
+ }
+
+ for (String server : serverAddress.values()) {
+ sb.append(server + "\n");
+ }
+
+ return sb.toString();
+ }
+
+ @Test(timeout = 60000)
+ // Verify no zoo.cfg.dynamic and zoo.cfg.bak files existing locally
+ // when reconfig feature flag is off by default.
+ public void testNoLocalDynamicConfigAndBackupFiles()
+ throws InterruptedException, IOException {
+ int serverCount = 3;
+ String config = generateNewQuorumConfig(serverCount);
+ QuorumPeerTestBase.MainThread mt[] = new
QuorumPeerTestBase.MainThread[serverCount];
+ String[] staticFileContent = new String[serverCount];
+
+ for (int i = 0; i < serverCount; i++) {
+ mt[i] = new QuorumPeerTestBase.MainThread(i,
clientPorts.get(i),
+ config, false);
+ mt[i].start();
+ }
+
+ for (int i = 0; i < serverCount; i++) {
+ Assert.assertTrue("waiting for server " + i + " being up",
+ ClientBase.waitForServerUp("127.0.0.1:" +
clientPorts.get(i),
+ CONNECTION_TIMEOUT));
+ Assert.assertNull("static file backup (zoo.cfg.bak) shouldn't
exist!",
+ mt[i].getFileByName(backupFileName));
+ Assert.assertNull("dynamic configuration file
(zoo.cfg.dynamic.*) shouldn't exist!",
+
mt[i].getFileByName(mt[i].getQuorumPeer().getNextDynamicConfigFilename()));
+ staticFileContent[i] =
Files.readAllLines(mt[i].confFile.toPath(), StandardCharsets.UTF_8).toString();
+ Assert.assertTrue("static config file should contain server
entry " + serverAddress.get(i),
+ staticFileContent[i].contains(serverAddress.get(i)));
+ }
+
+ for (int i = 0; i < serverCount; i++) {
+ mt[i].shutdown();
+ }
+ }
+
+ @Test(timeout = 60000)
+ // This test simulate the usual rolling restart with no membership
change:
+ // 1. A node is shutdown first (e.g. to upgrade software, or hardware,
or cleanup local data.).
+ // 2. After upgrade, start the node.
+ // 3. Do this for every node, one at a time.
+ public void testRollingRestartWithoutMembershipChange()
+ throws IOException, InterruptedException, KeeperException {
+ int serverCount = 3;
+ String config = generateNewQuorumConfig(serverCount);
+ QuorumPeerTestBase.MainThread mt[] = new
QuorumPeerTestBase.MainThread[serverCount];
+ for (int i = 0; i < serverCount; ++i) {
+ mt[i] = new QuorumPeerTestBase.MainThread(i,
clientPorts.get(i),
+ config, false);
+ mt[i].start();
+ }
+
+ for (int i = 0; i < serverCount; ++i) {
+ Assert.assertTrue("waiting for server " + i + " being up",
+ ClientBase.waitForServerUp("127.0.0.1:" +
clientPorts.get(i),
+ CONNECTION_TIMEOUT));
+ }
+
+ for (int i = 0; i < serverCount; ++i) {
+ mt[i].shutdown();
+ mt[i].start();
+ verifyQuorum(i);
--- End diff --
two comments:
1. is calling start immediately after shutdown effective ? I don't remember
what shutdown is doing, but if its setting a flag and a thread periodically
checks it, it may take time to take effect.
2. nothing is changing about the config as far as I can see. Why would this
ever fail ?
> Changing membership configuration via rolling restart does not work on 3.5.x.
> -----------------------------------------------------------------------------
>
> Key: ZOOKEEPER-2819
> URL: https://issues.apache.org/jira/browse/ZOOKEEPER-2819
> Project: ZooKeeper
> Issue Type: Bug
> Components: quorum, server
> Affects Versions: 3.5.0, 3.5.1, 3.5.2, 3.5.3
> Reporter: Michael Han
> Assignee: Michael Han
> Priority: Critical
>
> In 3.5.x there is no easy way of changing the membership config using rolling
> restarts because of the introduction of dynamic reconfig feature in
> ZOOKEEPER-107, which automatically manages membership configuration
> parameters.
> ZOOKEEPER-2014 introduced a reconfigEnabled flag to turn on / off the
> reconfig feature. We can use same flag and when it sets to false, it should
> disable both in memory and on disk updates of membership configuration
> information, besides disabling the reconfig commands on CLI which
> ZOOKEEPER-2014 already did, so users can continue using rolling restarts if
> needed.
> We should also document explicitly the support of membership changes via
> rolling restarts will be deprecated at what release time frame and promote
> reconfig as the replacement.
> The problem was raised at user mailing list by Guillermo Vega-Toro, reference
> thread:
> http://zookeeper-user.578899.n2.nabble.com/How-to-add-nodes-to-a-Zookeeper-3-5-3-beta-ensemble-with-reconfigEnabled-false-td7583138.html
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)