This is an automated email from the ASF dual-hosted git repository. yongzao pushed a commit to branch Optimized-the-logic-that-Datanodes-can't-restart-before-20-seconds in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 04e262e867df8ad4f25eb15dfa23c49cabf43828 Author: YongzaoDan <[email protected]> AuthorDate: Thu Feb 16 16:19:03 2023 +0800 Finish --- .../confignode/conf/ConfigNodeStartupCheck.java | 1 + .../manager/node/ClusterNodeStartUtils.java | 21 +------- .../it/cluster/IoTDBClusterNodeErrorStartUpIT.java | 57 ++++++---------------- .../it/cluster/IoTDBClusterNodeGetterIT.java | 1 + .../it/cluster/IoTDBClusterRestartIT.java | 1 + 5 files changed, 20 insertions(+), 61 deletions(-) diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeStartupCheck.java b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeStartupCheck.java index b444928dcf..27efcc534a 100644 --- a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeStartupCheck.java +++ b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeStartupCheck.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.apache.iotdb.confignode.conf; import org.apache.iotdb.commons.conf.IoTDBConstant; diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/manager/node/ClusterNodeStartUtils.java b/confignode/src/main/java/org/apache/iotdb/confignode/manager/node/ClusterNodeStartUtils.java index d1a118a9f2..699b7026d6 100644 --- a/confignode/src/main/java/org/apache/iotdb/confignode/manager/node/ClusterNodeStartUtils.java +++ b/confignode/src/main/java/org/apache/iotdb/confignode/manager/node/ClusterNodeStartUtils.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.apache.iotdb.confignode.manager.node; import org.apache.iotdb.common.rpc.thrift.TConfigNodeLocation; @@ -23,13 +24,11 @@ import org.apache.iotdb.common.rpc.thrift.TDataNodeConfiguration; import org.apache.iotdb.common.rpc.thrift.TDataNodeLocation; import org.apache.iotdb.common.rpc.thrift.TEndPoint; import org.apache.iotdb.common.rpc.thrift.TSStatus; -import org.apache.iotdb.commons.cluster.NodeStatus; import org.apache.iotdb.commons.cluster.NodeType; import org.apache.iotdb.commons.conf.IoTDBConstant; import org.apache.iotdb.confignode.conf.ConfigNodeConstant; import org.apache.iotdb.confignode.conf.ConfigNodeDescriptor; import org.apache.iotdb.confignode.manager.ConfigManager; -import org.apache.iotdb.confignode.manager.node.heartbeat.BaseNodeCache; import org.apache.iotdb.rpc.TSStatusCode; import java.util.ArrayList; @@ -196,24 +195,6 @@ public class ClusterNodeStartUtils { return status; } - boolean isNodeAlive; - NodeStatus nodeStatus = configManager.getNodeManager().getNodeStatusByNodeId(nodeId); - isNodeAlive = nodeStatus != null && !NodeStatus.Unknown.equals(nodeStatus); - if (isNodeAlive) { - /* Reject restart because the Node is still alive */ - status.setCode(TSStatusCode.REJECT_NODE_START.getStatusCode()); - status.setMessage( - String.format( - "Reject %s restart. Because there already exists an alive Node with the same nodeId=%d in the target cluster." - + POSSIBLE_SOLUTIONS - + "\t1. Maybe you've just shutdown this Node recently. Please wait about %s for the ConfigNode-leader to mark this Node as Unknown before retry start. You can use SQL \"show cluster details\" to find out this Node's status." - + "\n\t2. Maybe you start this Node by copying the 'data' dir of another alive Node. Please delete 'data' dir and retry start.", - nodeType.getNodeType(), - nodeId, - (BaseNodeCache.HEARTBEAT_TIMEOUT_TIME / 1000) + "s")); - return status; - } - boolean isTEndPointUpdated; switch (nodeType) { case ConfigNode: diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeErrorStartUpIT.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeErrorStartUpIT.java index a57a3d1570..c92bc04c2b 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeErrorStartUpIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeErrorStartUpIT.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.apache.iotdb.confignode.it.cluster; import org.apache.iotdb.common.rpc.thrift.TConfigNodeLocation; @@ -229,8 +230,18 @@ public class IoTDBClusterNodeErrorStartUpIT { dataNodeRestartResp.getStatus().getCode()); Assert.assertTrue(dataNodeRestartResp.getStatus().getMessage().contains("whose nodeId=")); - /* Restart an alive Node */ + // Shutdown and check + EnvFactory.getEnv().shutdownConfigNode(1); + EnvFactory.getEnv().shutdownDataNode(0); + EnvFactory.getEnv() + .ensureNodeStatus( + Arrays.asList( + EnvFactory.getEnv().getConfigNodeWrapper(1), + EnvFactory.getEnv().getDataNodeWrapper(0)), + Arrays.asList(NodeStatus.Unknown, NodeStatus.Unknown)); + /* Restart and updatePeer */ + // TODO: @Itami-sho, enable this test and delete it int registeredConfigNodeId = -1; TShowClusterResp showClusterResp = client.showCluster(); for (TConfigNodeLocation configNodeLocation : showClusterResp.getConfigNodeList()) { @@ -241,16 +252,16 @@ public class IoTDBClusterNodeErrorStartUpIT { } } Assert.assertNotEquals(-1, registeredConfigNodeId); + int originPort = registeredConfigNodeWrapper.getConsensusPort(); + registeredConfigNodeWrapper.setConsensusPort(-12345); configNodeRestartReq = ConfigNodeTestUtils.generateTConfigNodeRestartReq( TEST_CLUSTER_NAME, registeredConfigNodeId, registeredConfigNodeWrapper); configNodeRestartStatus = client.restartConfigNode(configNodeRestartReq); Assert.assertEquals( TSStatusCode.REJECT_NODE_START.getStatusCode(), configNodeRestartStatus.getCode()); - Assert.assertTrue( - configNodeRestartStatus - .getMessage() - .contains("exists an alive Node with the same nodeId")); + Assert.assertTrue(configNodeRestartStatus.getMessage().contains("have been changed")); + registeredConfigNodeWrapper.setConsensusPort(originPort); int registeredDataNodeId = -1; showClusterResp = client.showCluster(); @@ -262,42 +273,6 @@ public class IoTDBClusterNodeErrorStartUpIT { } } Assert.assertNotEquals(-1, registeredDataNodeId); - dataNodeRestartReq = - ConfigNodeTestUtils.generateTDataNodeRestartReq( - TEST_CLUSTER_NAME, registeredDataNodeId, registeredDataNodeWrapper); - dataNodeRestartResp = client.restartDataNode(dataNodeRestartReq); - Assert.assertEquals( - TSStatusCode.REJECT_NODE_START.getStatusCode(), - dataNodeRestartResp.getStatus().getCode()); - Assert.assertTrue( - dataNodeRestartResp - .getStatus() - .getMessage() - .contains("exists an alive Node with the same nodeId")); - - // Shutdown and check - EnvFactory.getEnv().shutdownConfigNode(1); - EnvFactory.getEnv().shutdownDataNode(0); - EnvFactory.getEnv() - .ensureNodeStatus( - Arrays.asList( - EnvFactory.getEnv().getConfigNodeWrapper(1), - EnvFactory.getEnv().getDataNodeWrapper(0)), - Arrays.asList(NodeStatus.Unknown, NodeStatus.Unknown)); - - /* Restart and updatePeer */ - // TODO: @Itami-sho, enable this test and delete it - int originPort = registeredConfigNodeWrapper.getConsensusPort(); - registeredConfigNodeWrapper.setConsensusPort(-12345); - configNodeRestartReq = - ConfigNodeTestUtils.generateTConfigNodeRestartReq( - TEST_CLUSTER_NAME, registeredConfigNodeId, registeredConfigNodeWrapper); - configNodeRestartStatus = client.restartConfigNode(configNodeRestartReq); - Assert.assertEquals( - TSStatusCode.REJECT_NODE_START.getStatusCode(), configNodeRestartStatus.getCode()); - Assert.assertTrue(configNodeRestartStatus.getMessage().contains("have been changed")); - registeredConfigNodeWrapper.setConsensusPort(originPort); - originPort = registeredDataNodeWrapper.getInternalPort(); registeredDataNodeWrapper.setInternalPort(-12345); dataNodeRestartReq = diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeGetterIT.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeGetterIT.java index 1c9b570859..351d69a303 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeGetterIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeGetterIT.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.apache.iotdb.confignode.it.cluster; import org.apache.iotdb.common.rpc.thrift.TConfigNodeLocation; diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterRestartIT.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterRestartIT.java index fd6e275c68..66e44b9362 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterRestartIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterRestartIT.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.apache.iotdb.confignode.it.cluster; import org.apache.iotdb.common.rpc.thrift.TConfigNodeLocation;
