This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 737297e HDDS-5719. Reduce number of mini-clusters needed for
decommission tests (#2617)
737297e is described below
commit 737297e0ea9d6a82c81bf42546a685f71af2db8d
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Wed Sep 8 16:44:03 2021 +0100
HDDS-5719. Reduce number of mini-clusters needed for decommission tests
(#2617)
---
.../scm/node/TestDecommissionAndMaintenance.java | 90 +++++++---------------
1 file changed, 29 insertions(+), 61 deletions(-)
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDecommissionAndMaintenance.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDecommissionAndMaintenance.java
index 5cc0570..658af80 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDecommissionAndMaintenance.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDecommissionAndMaintenance.java
@@ -129,7 +129,7 @@ public class TestDecommissionAndMaintenance {
MiniOzoneCluster.Builder builder = MiniOzoneCluster.newBuilder(conf)
.setNumDatanodes(numOfDatanodes);
- clusterProvider = new MiniOzoneClusterProvider(conf, builder, 11);
+ clusterProvider = new MiniOzoneClusterProvider(conf, builder, 8);
}
@AfterClass
@@ -157,8 +157,9 @@ public class TestDecommissionAndMaintenance {
@Test
// Decommissioning a node with open pipelines should close the pipelines
// and hence the open containers and then the containers should be replicated
- // by the replication manager.
- public void testNodeWithOpenPipelineCanBeDecommissioned()
+ // by the replication manager. After the node completes decommission, it can
+ // be recommissioned.
+ public void testNodeWithOpenPipelineCanBeDecommissionedAndRecommissioned()
throws Exception {
// Generate some data on the empty cluster to create some containers
generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
@@ -185,43 +186,26 @@ public class TestDecommissionAndMaintenance {
waitForContainerReplicas(container, 4);
// Stop the decommissioned DN
+ int dnIndex = cluster.getHddsDatanodeIndex(toDecommission);
cluster.shutdownHddsDatanode(toDecommission);
waitForDnToReachHealthState(toDecommission, DEAD);
// Now the decommissioned node is dead, we should have
// 3 replicas for the tracked container.
waitForContainerReplicas(container, 3);
- }
- @Test
- // After a SCM restart, it will have forgotten all the Operational states.
- // However the state will have been persisted on the DNs. Therefore on
initial
- // registration, the DN operationalState is the source of truth and SCM
should
- // be updated to reflect that.
- public void testDecommissionedStateReinstatedAfterSCMRestart()
- throws Exception {
- // Decommission any node and wait for it to be DECOMMISSIONED
- generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
- DatanodeDetails dn = nm.getAllNodes().get(0);
- scmClient.decommissionNodes(Arrays.asList(getDNHostAndPort(dn)));
- waitForDnToReachOpState(dn, DECOMMISSIONED);
-
- cluster.restartStorageContainerManager(true);
- setManagers();
- DatanodeDetails newDn = nm.getNodeByUuid(dn.getUuid().toString());
-
- // On initial registration, the DN should report its operational state
- // and if it is decommissioned, that should be updated in the NodeStatus
- waitForDnToReachOpState(newDn, DECOMMISSIONED);
- // Also confirm the datanodeDetails correctly reflect the operational
- // state.
- waitForDnToReachPersistedOpState(newDn, DECOMMISSIONED);
+ cluster.restartHddsDatanode(dnIndex, true);
+ scmClient.recommissionNodes(Arrays.asList(
+ getDNHostAndPort(toDecommission)));
+ waitForDnToReachOpState(toDecommission, IN_SERVICE);
+ waitForDnToReachPersistedOpState(toDecommission, IN_SERVICE);
}
@Test
// If a node has not yet completed decommission and SCM is restarted, then
// when it re-registers it should re-enter the decommission workflow and
- // complete decommissioning.
+ // complete decommissioning. If SCM is restarted after decommssion is
complete
+ // then SCM should learn of the decommissioned DN when it registers.
public void testDecommissioningNodesCompleteDecommissionOnSCMRestart()
throws Exception {
// First stop the replicationManager so nodes marked for decommission
cannot
@@ -247,6 +231,18 @@ public class TestDecommissionAndMaintenance {
DatanodeDetails newDn = nm.getNodeByUuid(dn.getUuid().toString());
waitForDnToReachOpState(newDn, DECOMMISSIONED);
waitForDnToReachPersistedOpState(newDn, DECOMMISSIONED);
+
+ // Now the node is decommissioned, so restart SCM again
+ cluster.restartStorageContainerManager(true);
+ setManagers();
+ newDn = nm.getNodeByUuid(dn.getUuid().toString());
+
+ // On initial registration, the DN should report its operational state
+ // and if it is decommissioned, that should be updated in the NodeStatus
+ waitForDnToReachOpState(newDn, DECOMMISSIONED);
+ // Also confirm the datanodeDetails correctly reflect the operational
+ // state.
+ waitForDnToReachPersistedOpState(newDn, DECOMMISSIONED);
}
@Test
@@ -284,27 +280,12 @@ public class TestDecommissionAndMaintenance {
}
@Test
- // A node which is decommissioning or decommissioned can be move back to
- // IN_SERVICE.
- public void testDecommissionedNodeCanBeRecommissioned() throws Exception {
- generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
- DatanodeDetails dn = nm.getAllNodes().get(0);
- scmClient.decommissionNodes(Arrays.asList(getDNHostAndPort(dn)));
-
- GenericTestUtils.waitFor(
- () -> !dn.getPersistedOpState()
- .equals(IN_SERVICE),
- 200, 30000);
-
- scmClient.recommissionNodes(Arrays.asList(getDNHostAndPort(dn)));
- waitForDnToReachOpState(dn, IN_SERVICE);
- waitForDnToReachPersistedOpState(dn, IN_SERVICE);
- }
-
- @Test
// When putting a single node into maintenance, its pipelines should be
closed
// but no new replicas should be create and the node should transition into
- // maintenance
+ // maintenance.
+ // After a restart, the DN should keep the maintenance state.
+ // If the DN is recommissioned while stopped, it should get the
recommissioned
+ // state when it re-registers.
public void testSingleNodeWithOpenPipelineCanGotoMaintenance()
throws Exception {
// Generate some data on the empty cluster to create some containers
@@ -343,21 +324,8 @@ public class TestDecommissionAndMaintenance {
DatanodeDetails newDN = nm.getNodeByUuid(dn.getUuid().toString());
waitForDnToReachHealthState(newDN, HEALTHY);
waitForDnToReachPersistedOpState(newDN, IN_MAINTENANCE);
- }
-
- @Test
- // After a node enters maintenance and is stopped, it can be recommissioned
in
- // SCM. Then when it is restarted, it should go back to IN_SERVICE and have
- // that persisted on the DN.
- public void testStoppedMaintenanceNodeTakesScmStateOnRestart()
- throws Exception {
- // Put a node into maintenance and wait for it to complete
- generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
- DatanodeDetails dn = nm.getAllNodes().get(0);
- scmClient.startMaintenanceNodes(Arrays.asList(getDNHostAndPort(dn)), 0);
- waitForDnToReachOpState(dn, IN_MAINTENANCE);
- waitForDnToReachPersistedOpState(dn, IN_MAINTENANCE);
+ // Stop the DN and wait for it to go dead.
int dnIndex = cluster.getHddsDatanodeIndex(dn);
cluster.shutdownHddsDatanode(dnIndex);
waitForDnToReachHealthState(dn, DEAD);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]