[ozone] branch master updated: HDDS-5719. Reduce number of mini-clusters needed for decommission tests (#2617)

sodonnell Wed, 08 Sep 2021 08:44:29 -0700

This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git



The following commit(s) were added to refs/heads/master by this push:
     new 737297e  HDDS-5719. Reduce number of mini-clusters needed for 
decommission tests (#2617)
737297e is described below

commit 737297e0ea9d6a82c81bf42546a685f71af2db8d
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Wed Sep 8 16:44:03 2021 +0100

    HDDS-5719. Reduce number of mini-clusters needed for decommission tests 
(#2617)
---
 .../scm/node/TestDecommissionAndMaintenance.java   | 90 +++++++---------------
 1 file changed, 29 insertions(+), 61 deletions(-)

diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDecommissionAndMaintenance.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDecommissionAndMaintenance.java
index 5cc0570..658af80 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDecommissionAndMaintenance.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDecommissionAndMaintenance.java
@@ -129,7 +129,7 @@ public class TestDecommissionAndMaintenance {
     MiniOzoneCluster.Builder builder = MiniOzoneCluster.newBuilder(conf)
         .setNumDatanodes(numOfDatanodes);
 
-    clusterProvider = new MiniOzoneClusterProvider(conf, builder, 11);
+    clusterProvider = new MiniOzoneClusterProvider(conf, builder, 8);
   }
 
   @AfterClass
@@ -157,8 +157,9 @@ public class TestDecommissionAndMaintenance {
   @Test
   // Decommissioning a node with open pipelines should close the pipelines
   // and hence the open containers and then the containers should be replicated
-  // by the replication manager.
-  public void testNodeWithOpenPipelineCanBeDecommissioned()
+  // by the replication manager. After the node completes decommission, it can
+  // be recommissioned.
+  public void testNodeWithOpenPipelineCanBeDecommissionedAndRecommissioned()
       throws Exception {
     // Generate some data on the empty cluster to create some containers
     generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
@@ -185,43 +186,26 @@ public class TestDecommissionAndMaintenance {
     waitForContainerReplicas(container, 4);
 
     // Stop the decommissioned DN
+    int dnIndex = cluster.getHddsDatanodeIndex(toDecommission);
     cluster.shutdownHddsDatanode(toDecommission);
     waitForDnToReachHealthState(toDecommission, DEAD);
 
     // Now the decommissioned node is dead, we should have
     // 3 replicas for the tracked container.
     waitForContainerReplicas(container, 3);
-  }
 
-  @Test
-  // After a SCM restart, it will have forgotten all the Operational states.
-  // However the state will have been persisted on the DNs. Therefore on 
initial
-  // registration, the DN operationalState is the source of truth and SCM 
should
-  // be updated to reflect that.
-  public void testDecommissionedStateReinstatedAfterSCMRestart()
-      throws Exception {
-    // Decommission any node and wait for it to be DECOMMISSIONED
-    generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
-    DatanodeDetails dn = nm.getAllNodes().get(0);
-    scmClient.decommissionNodes(Arrays.asList(getDNHostAndPort(dn)));
-    waitForDnToReachOpState(dn, DECOMMISSIONED);
-
-    cluster.restartStorageContainerManager(true);
-    setManagers();
-    DatanodeDetails newDn = nm.getNodeByUuid(dn.getUuid().toString());
-
-    // On initial registration, the DN should report its operational state
-    // and if it is decommissioned, that should be updated in the NodeStatus
-    waitForDnToReachOpState(newDn, DECOMMISSIONED);
-    // Also confirm the datanodeDetails correctly reflect the operational
-    // state.
-    waitForDnToReachPersistedOpState(newDn, DECOMMISSIONED);
+    cluster.restartHddsDatanode(dnIndex, true);
+    scmClient.recommissionNodes(Arrays.asList(
+        getDNHostAndPort(toDecommission)));
+    waitForDnToReachOpState(toDecommission, IN_SERVICE);
+    waitForDnToReachPersistedOpState(toDecommission, IN_SERVICE);
   }
 
   @Test
   // If a node has not yet completed decommission and SCM is restarted, then
   // when it re-registers it should re-enter the decommission workflow and
-  // complete decommissioning.
+  // complete decommissioning. If SCM is restarted after decommssion is 
complete
+  // then SCM should learn of the decommissioned DN when it registers.
   public void testDecommissioningNodesCompleteDecommissionOnSCMRestart()
       throws Exception {
     // First stop the replicationManager so nodes marked for decommission 
cannot
@@ -247,6 +231,18 @@ public class TestDecommissionAndMaintenance {
     DatanodeDetails newDn = nm.getNodeByUuid(dn.getUuid().toString());
     waitForDnToReachOpState(newDn, DECOMMISSIONED);
     waitForDnToReachPersistedOpState(newDn, DECOMMISSIONED);
+
+    // Now the node is decommissioned, so restart SCM again
+    cluster.restartStorageContainerManager(true);
+    setManagers();
+    newDn = nm.getNodeByUuid(dn.getUuid().toString());
+
+    // On initial registration, the DN should report its operational state
+    // and if it is decommissioned, that should be updated in the NodeStatus
+    waitForDnToReachOpState(newDn, DECOMMISSIONED);
+    // Also confirm the datanodeDetails correctly reflect the operational
+    // state.
+    waitForDnToReachPersistedOpState(newDn, DECOMMISSIONED);
   }
 
   @Test
@@ -284,27 +280,12 @@ public class TestDecommissionAndMaintenance {
   }
 
   @Test
-  // A node which is decommissioning or decommissioned can be move back to
-  // IN_SERVICE.
-  public void testDecommissionedNodeCanBeRecommissioned() throws Exception {
-    generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
-    DatanodeDetails dn = nm.getAllNodes().get(0);
-    scmClient.decommissionNodes(Arrays.asList(getDNHostAndPort(dn)));
-
-    GenericTestUtils.waitFor(
-        () -> !dn.getPersistedOpState()
-            .equals(IN_SERVICE),
-        200, 30000);
-
-    scmClient.recommissionNodes(Arrays.asList(getDNHostAndPort(dn)));
-    waitForDnToReachOpState(dn, IN_SERVICE);
-    waitForDnToReachPersistedOpState(dn, IN_SERVICE);
-  }
-
-  @Test
   // When putting a single node into maintenance, its pipelines should be 
closed
   // but no new replicas should be create and the node should transition into
-  // maintenance
+  // maintenance.
+  // After a restart, the DN should keep the maintenance state.
+  // If the DN is recommissioned while stopped, it should get the 
recommissioned
+  // state when it re-registers.
   public void testSingleNodeWithOpenPipelineCanGotoMaintenance()
       throws Exception {
     // Generate some data on the empty cluster to create some containers
@@ -343,21 +324,8 @@ public class TestDecommissionAndMaintenance {
     DatanodeDetails newDN = nm.getNodeByUuid(dn.getUuid().toString());
     waitForDnToReachHealthState(newDN, HEALTHY);
     waitForDnToReachPersistedOpState(newDN, IN_MAINTENANCE);
-  }
-
-  @Test
-  // After a node enters maintenance and is stopped, it can be recommissioned 
in
-  // SCM. Then when it is restarted, it should go back to IN_SERVICE and have
-  // that persisted on the DN.
-  public void testStoppedMaintenanceNodeTakesScmStateOnRestart()
-      throws Exception {
-    // Put a node into maintenance and wait for it to complete
-    generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
-    DatanodeDetails dn = nm.getAllNodes().get(0);
-    scmClient.startMaintenanceNodes(Arrays.asList(getDNHostAndPort(dn)), 0);
-    waitForDnToReachOpState(dn, IN_MAINTENANCE);
-    waitForDnToReachPersistedOpState(dn, IN_MAINTENANCE);
 
+    // Stop the DN and wait for it to go dead.
     int dnIndex = cluster.getHddsDatanodeIndex(dn);
     cluster.shutdownHddsDatanode(dnIndex);
     waitForDnToReachHealthState(dn, DEAD);

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[ozone] branch master updated: HDDS-5719. Reduce number of mini-clusters needed for decommission tests (#2617)

Reply via email to