This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 2f9f771e35 HDDS-9511. Handle DataNode decommissioning error for 
hoststring and port. (#5469)
2f9f771e35 is described below

commit 2f9f771e35d72240bf72bd1bcc99bc6cb046e613
Author: ashishkumar50 <[email protected]>
AuthorDate: Mon Oct 23 20:00:36 2023 +0530

    HDDS-9511. Handle DataNode decommissioning error for hoststring and port. 
(#5469)
---
 .../hdds/scm/node/NodeDecommissionManager.java     | 65 +++++++++++++-------
 .../hdds/scm/node/TestNodeDecommissionManager.java | 71 ++++++++++++----------
 2 files changed, 83 insertions(+), 53 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
index 3e0ac9372d..f48841bc67 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
@@ -109,17 +109,26 @@ public class NodeDecommissionManager {
     }
   }
 
-  private List<DatanodeDetails> mapHostnamesToDatanodes(List<String> hosts)
-      throws InvalidHostStringException {
+  private List<DatanodeDetails> mapHostnamesToDatanodes(List<String> hosts,
+      List<DatanodeAdminError> errors) {
     List<DatanodeDetails> results = new LinkedList<>();
+    HostDefinition host;
+    InetAddress addr;
+    String msg;
     for (String hostString : hosts) {
-      HostDefinition host = new HostDefinition(hostString);
-      InetAddress addr;
       try {
+        host = new HostDefinition(hostString);
         addr = InetAddress.getByName(host.getHostname());
+      } catch (InvalidHostStringException e) {
+        LOG.warn("Unable to resolve host {} ", hostString, e);
+        errors.add(new DatanodeAdminError(hostString,
+            e.getMessage()));
+        continue;
       } catch (UnknownHostException e) {
-        throw new InvalidHostStringException("Unable to resolve host "
-            + host.getRawHostname(), e);
+        LOG.warn("Unable to resolve host {} ", hostString, e);
+        errors.add(new DatanodeAdminError(hostString,
+            e.getMessage()));
+        continue;
       }
       String dnsName;
       if (useHostnames) {
@@ -129,16 +138,21 @@ public class NodeDecommissionManager {
       }
       List<DatanodeDetails> found = nodeManager.getNodesByAddress(dnsName);
       if (found.isEmpty()) {
-        throw new InvalidHostStringException("Host " + host.getRawHostname()
+        msg = "Host " + host.getRawHostname()
             + " (" + dnsName + ") is not running any datanodes registered"
-            + " with SCM. Please check the host name.");
+            + " with SCM. Please check the host name.";
+        LOG.warn(msg);
+        errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
       } else if (found.size() == 1) {
         if (host.getPort() != -1 &&
             !validateDNPortMatch(host.getPort(), found.get(0))) {
-          throw new InvalidHostStringException("Host " + host.getRawHostname()
+          msg = "Host " + host.getRawHostname()
               + " is running a datanode registered with SCM,"
               + " but the port number doesn't match."
-              + " Please check the port number.");
+              + " Please check the port number.";
+          LOG.warn(msg);
+          errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
+          continue;
         }
         results.add(found.get(0));
       } else {
@@ -149,13 +163,17 @@ public class NodeDecommissionManager {
         // should be the same, and we should just use the one with the most
         // recent heartbeat.
         if (host.getPort() != -1) {
-          found.removeIf(dn -> !validateDNPortMatch(host.getPort(), dn));
+          HostDefinition finalHost = host;
+          found.removeIf(dn -> !validateDNPortMatch(finalHost.getPort(), dn));
         }
         if (found.isEmpty()) {
-          throw new InvalidHostStringException("Host " + host.getRawHostname()
+          msg = "Host " + host.getRawHostname()
               + " is running multiple datanodes registered with SCM,"
               + " but no port numbers match."
-              + " Please check the port number.");
+              + " Please check the port number.";
+          LOG.warn(msg);
+          errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
+          continue;
         } else if (found.size() == 1) {
           results.add(found.get(0));
           continue;
@@ -168,19 +186,24 @@ public class NodeDecommissionManager {
           // not possible for a host to have 2 DNs coming from the same port.
           DatanodeDetails mostRecent = findDnWithMostRecentHeartbeat(found);
           if (mostRecent == null) {
-            throw new InvalidHostStringException("Host " + 
host.getRawHostname()
+            msg = "Host " + host.getRawHostname()
                 + " has multiple datanodes registered with SCM."
                 + " All have identical ports, but none have a newest"
-                + " heartbeat.");
+                + " heartbeat.";
+            LOG.warn(msg);
+            errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
+            continue;
           }
           results.add(mostRecent);
         } else {
           // We have no passed in port or the ports in SCM do not all match, so
           // we cannot decide which DN to use.
-          throw new InvalidHostStringException("Host " + host.getRawHostname()
+          msg = "Host " + host.getRawHostname()
               + " is running multiple datanodes registered with SCM,"
               + " but no port numbers match."
-              + " Please check the port number.");
+              + " Please check the port number.";
+          LOG.warn(msg);
+          errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
         }
       }
     }
@@ -291,9 +314,9 @@ public class NodeDecommissionManager {
   }
 
   public synchronized List<DatanodeAdminError> decommissionNodes(
-      List<String> nodes) throws InvalidHostStringException {
-    List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
+      List<String> nodes) {
     List<DatanodeAdminError> errors = new ArrayList<>();
+    List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes, errors);
     for (DatanodeDetails dn : dns) {
       try {
         startDecommission(dn);
@@ -357,8 +380,8 @@ public class NodeDecommissionManager {
 
   public synchronized List<DatanodeAdminError> recommissionNodes(
       List<String> nodes) throws InvalidHostStringException {
-    List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
     List<DatanodeAdminError> errors = new ArrayList<>();
+    List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes, errors);
     for (DatanodeDetails dn : dns) {
       try {
         recommission(dn);
@@ -394,8 +417,8 @@ public class NodeDecommissionManager {
 
   public synchronized List<DatanodeAdminError> startMaintenanceNodes(
       List<String> nodes, int endInHours) throws InvalidHostStringException {
-    List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
     List<DatanodeAdminError> errors = new ArrayList<>();
+    List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes, errors);
     for (DatanodeDetails dn : dns) {
       try {
         startMaintenance(dn, endInHours);
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
index c0e3b804bf..55b9547f5a 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
@@ -29,6 +29,7 @@ import 
org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
 import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
 import org.apache.hadoop.hdds.server.events.EventQueue;
 import 
org.apache.hadoop.security.authentication.client.AuthenticationException;
+import org.junit.Assert;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
@@ -94,48 +95,53 @@ public class TestNodeDecommissionManager {
   }
 
   @Test
-  public void testAnyInvalidHostThrowsException()
-      throws InvalidHostStringException {
+  public void testAnyInvalidHostThrowsException() {
     List<DatanodeDetails> dns = generateDatanodes();
 
     // Try to decommission a host that does exist, but give incorrect port
-    try {
-      decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress() + 
":10"));
-      fail("InvalidHostStringException expected");
-    } catch (InvalidHostStringException e) {
-    }
+    List<DatanodeAdminError> error =
+        decom.decommissionNodes(
+            Arrays.asList(dns.get(1).getIpAddress() + ":10"));
+    Assert.assertTrue(error.size() == 1);
+    Assert.assertTrue(
+        error.get(0).getHostname().contains(dns.get(1).getIpAddress()));
 
     // Try to decommission a host that does not exist
-    try {
-      decom.decommissionNodes(Arrays.asList("123.123.123.123"));
-      fail("InvalidHostStringException expected");
-    } catch (InvalidHostStringException e) {
-    }
+    error = decom.decommissionNodes(Arrays.asList("123.123.123.123"));
+    Assert.assertTrue(error.size() == 1);
+    Assert.assertTrue(
+        error.get(0).getHostname().contains("123.123.123.123"));
 
     // Try to decommission a host that does exist and a host that does not
-    try {
-      decom.decommissionNodes(Arrays.asList(
+    error  = decom.decommissionNodes(Arrays.asList(
           dns.get(1).getIpAddress(), "123,123,123,123"));
-      fail("InvalidHostStringException expected");
-    } catch (InvalidHostStringException e) {
-    }
+    Assert.assertTrue(error.size() == 1);
+    Assert.assertTrue(
+        error.get(0).getHostname().contains("123,123,123,123"));
 
     // Try to decommission a host with many DNs on the address with no port
-    try {
-      decom.decommissionNodes(Arrays.asList(
+    error = decom.decommissionNodes(Arrays.asList(
           dns.get(0).getIpAddress()));
-      fail("InvalidHostStringException expected");
-    } catch (InvalidHostStringException e) {
-    }
+    Assert.assertTrue(error.size() == 1);
+    Assert.assertTrue(
+        error.get(0).getHostname().contains(dns.get(0).getIpAddress()));
 
     // Try to decommission a host with many DNs on the address with a port
     // that does not exist
-    try {
-      decom.decommissionNodes(Arrays.asList(
+    error = decom.decommissionNodes(Arrays.asList(
           dns.get(0).getIpAddress() + ":10"));
-      fail("InvalidHostStringException expected");
-    } catch (InvalidHostStringException e) {
-    }
+    Assert.assertTrue(error.size() == 1);
+    Assert.assertTrue(
+        error.get(0).getHostname().contains(dns.get(0).getIpAddress() + 
":10"));
+
+    // Try to decommission 2 hosts with address that does not exist
+    // Both should return error
+    error  = decom.decommissionNodes(Arrays.asList(
+        "123.123.123.123", "234.234.234.234"));
+    Assert.assertTrue(error.size() == 2);
+    Assert.assertTrue(
+        error.get(0).getHostname().contains("123.123.123.123") &&
+            error.get(1).getHostname().contains("234.234.234.234"));
   }
 
   @Test
@@ -219,11 +225,12 @@ public class TestNodeDecommissionManager {
     nodeManager.register(extraDN, null, null);
 
     // Attempt to decommission with just the IP, which should fail.
-    try {
-      decom.decommissionNodes(Arrays.asList(extraDN.getIpAddress()));
-      fail("InvalidHostStringException expected");
-    } catch (InvalidHostStringException e) {
-    }
+    List<DatanodeAdminError> error =
+        decom.decommissionNodes(Arrays.asList(extraDN.getIpAddress()));
+    Assert.assertTrue(error.size() == 1);
+    Assert.assertTrue(
+        error.get(0).getHostname().contains(extraDN.getIpAddress()));
+
     // Now try the one with the unique port
     decom.decommissionNodes(Arrays.asList(
         extraDN.getIpAddress() + ":" + ratisPort + 1));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to