siddhantsangwan commented on code in PR #6367:
URL: https://github.com/apache/ozone/pull/6367#discussion_r1546051331


##########
hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java:
##########
@@ -369,10 +410,208 @@ public void testNodeDecommissionManagerOnBecomeLeader() 
throws Exception {
     assertEquals(decom.getMonitor().getTrackedNodes().size(), 3);
   }
 
-  private SCMNodeManager createNodeManager(OzoneConfiguration config)
-      throws IOException, AuthenticationException {
-    scm = HddsTestUtils.getScm(config);
-    return (SCMNodeManager) scm.getScmNodeManager();
+  @Test
+  public void testInsufficientNodeDecommissionThrowsExceptionForRatis() throws
+      NodeNotFoundException, IOException {
+    when(containerManager.getContainer(any(ContainerID.class)))
+        .thenAnswer(invocation -> getMockContainer(RatisReplicationConfig
+                .getInstance(HddsProtos.ReplicationFactor.THREE), 
(ContainerID)invocation.getArguments()[0]));
+    List<DatanodeAdminError> error;
+    List<DatanodeDetails> dns = new ArrayList<>();
+
+    for (int i = 0; i < 5; i++) {
+      DatanodeDetails dn = MockDatanodeDetails.randomDatanodeDetails();
+      dns.add(dn);
+      nodeManager.register(dn, null, null);
+    }
+
+    Set<ContainerID> idsRatis = new HashSet<>();
+    for (int i = 0; i < 5; i++) {
+      ContainerInfo container = containerManager.allocateContainer(
+          
RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE), 
"admin");
+      idsRatis.add(container.containerID());
+    }
+
+    for (DatanodeDetails dn  : nodeManager.getAllNodes().subList(0, 3)) {
+      nodeManager.setContainers(dn, idsRatis);
+    }
+
+    error = decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress(),
+        dns.get(2).getIpAddress(), dns.get(3).getIpAddress(), 
dns.get(4).getIpAddress()), false);
+    assertTrue(error.get(0).getHostname().contains("AllHosts"));
+
+    error = decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress(),
+        dns.get(2).getIpAddress(), dns.get(3).getIpAddress(), 
dns.get(4).getIpAddress()), true);
+    assertTrue(error.size() == 0);

Review Comment:
   Intelllij: `'assertTrue()' can be simplified to 'assertEquals()' `



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java:
##########
@@ -368,6 +388,63 @@ public synchronized void startDecommission(DatanodeDetails 
dn)
     }
   }
 
+  private synchronized boolean 
checkIfDecommissionPossible(List<DatanodeDetails> dns, List<DatanodeAdminError> 
errors) {
+    int numDecom = dns.size();
+    List<DatanodeDetails> validDns = dns.stream().collect(Collectors.toList());
+    Collections.copy(validDns, dns);

Review Comment:
   Looks like these two lines are basically doing the same thing? Also, 
intellij suggests
   ```
   List<DatanodeDetails> validDns = new ArrayList<>(dns);
   ```



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java:
##########
@@ -368,6 +388,63 @@ public synchronized void startDecommission(DatanodeDetails 
dn)
     }
   }
 
+  private synchronized boolean 
checkIfDecommissionPossible(List<DatanodeDetails> dns, List<DatanodeAdminError> 
errors) {
+    int numDecom = dns.size();
+    List<DatanodeDetails> validDns = dns.stream().collect(Collectors.toList());
+    Collections.copy(validDns, dns);
+    int inServiceTotal = 
nodeManager.getNodeCount(NodeStatus.inServiceHealthy());
+    for (DatanodeDetails dn : dns) {
+      try {
+        NodeStatus nodeStatus = getNodeStatus(dn);
+        NodeOperationalState opState = nodeStatus.getOperationalState();
+        if (opState != NodeOperationalState.IN_SERVICE) {
+          numDecom--;
+          validDns.remove(dn);
+        }
+      } catch (NodeNotFoundException ex) {
+        numDecom--;
+        validDns.remove(dn);
+      }
+    }
+
+    for (DatanodeDetails dn : validDns) {
+      Set<ContainerID> containers;
+      try {
+        containers = nodeManager.getContainers(dn);
+      } catch (NodeNotFoundException ex) {
+        LOG.warn("The host {} was not found in SCM. Ignoring the request to " +
+            "decommission it", dn.getHostName());
+        errors.add(new DatanodeAdminError(dn.getHostName(),
+            "The host was not found in SCM"));

Review Comment:
   I don't think we need to add this exception to the list of errors here, 
since it'll get added later at line 340. This will avoid duplicate errors.



##########
hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java:
##########
@@ -369,10 +410,208 @@ public void testNodeDecommissionManagerOnBecomeLeader() 
throws Exception {
     assertEquals(decom.getMonitor().getTrackedNodes().size(), 3);
   }
 
-  private SCMNodeManager createNodeManager(OzoneConfiguration config)
-      throws IOException, AuthenticationException {
-    scm = HddsTestUtils.getScm(config);
-    return (SCMNodeManager) scm.getScmNodeManager();
+  @Test
+  public void testInsufficientNodeDecommissionThrowsExceptionForRatis() throws

Review Comment:
   The newly added test cases look good. Can we also assert that the DNs have 
transitioned to the decommissioning state wherever relevant? Existing tests are 
doing that.



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java:
##########
@@ -368,6 +388,63 @@ public synchronized void startDecommission(DatanodeDetails 
dn)
     }
   }
 
+  private synchronized boolean 
checkIfDecommissionPossible(List<DatanodeDetails> dns, List<DatanodeAdminError> 
errors) {
+    int numDecom = dns.size();
+    List<DatanodeDetails> validDns = dns.stream().collect(Collectors.toList());
+    Collections.copy(validDns, dns);
+    int inServiceTotal = 
nodeManager.getNodeCount(NodeStatus.inServiceHealthy());
+    for (DatanodeDetails dn : dns) {
+      try {
+        NodeStatus nodeStatus = getNodeStatus(dn);
+        NodeOperationalState opState = nodeStatus.getOperationalState();
+        if (opState != NodeOperationalState.IN_SERVICE) {
+          numDecom--;
+          validDns.remove(dn);
+        }
+      } catch (NodeNotFoundException ex) {
+        numDecom--;
+        validDns.remove(dn);
+      }
+    }
+
+    for (DatanodeDetails dn : validDns) {
+      Set<ContainerID> containers;
+      try {
+        containers = nodeManager.getContainers(dn);
+      } catch (NodeNotFoundException ex) {
+        LOG.warn("The host {} was not found in SCM. Ignoring the request to " +
+            "decommission it", dn.getHostName());
+        errors.add(new DatanodeAdminError(dn.getHostName(),
+            "The host was not found in SCM"));
+        continue; // ignore the DN and continue to next one
+      }
+
+      for (ContainerID cid : containers) {
+        ContainerInfo cif;
+        try {
+          cif = containerManager.getContainer(cid);
+        } catch (ContainerNotFoundException ex) {

Review Comment:
   We should log this exception.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to