This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 2f9f771e35 HDDS-9511. Handle DataNode decommissioning error for
hoststring and port. (#5469)
2f9f771e35 is described below
commit 2f9f771e35d72240bf72bd1bcc99bc6cb046e613
Author: ashishkumar50 <[email protected]>
AuthorDate: Mon Oct 23 20:00:36 2023 +0530
HDDS-9511. Handle DataNode decommissioning error for hoststring and port.
(#5469)
---
.../hdds/scm/node/NodeDecommissionManager.java | 65 +++++++++++++-------
.../hdds/scm/node/TestNodeDecommissionManager.java | 71 ++++++++++++----------
2 files changed, 83 insertions(+), 53 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
index 3e0ac9372d..f48841bc67 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
@@ -109,17 +109,26 @@ public class NodeDecommissionManager {
}
}
- private List<DatanodeDetails> mapHostnamesToDatanodes(List<String> hosts)
- throws InvalidHostStringException {
+ private List<DatanodeDetails> mapHostnamesToDatanodes(List<String> hosts,
+ List<DatanodeAdminError> errors) {
List<DatanodeDetails> results = new LinkedList<>();
+ HostDefinition host;
+ InetAddress addr;
+ String msg;
for (String hostString : hosts) {
- HostDefinition host = new HostDefinition(hostString);
- InetAddress addr;
try {
+ host = new HostDefinition(hostString);
addr = InetAddress.getByName(host.getHostname());
+ } catch (InvalidHostStringException e) {
+ LOG.warn("Unable to resolve host {} ", hostString, e);
+ errors.add(new DatanodeAdminError(hostString,
+ e.getMessage()));
+ continue;
} catch (UnknownHostException e) {
- throw new InvalidHostStringException("Unable to resolve host "
- + host.getRawHostname(), e);
+ LOG.warn("Unable to resolve host {} ", hostString, e);
+ errors.add(new DatanodeAdminError(hostString,
+ e.getMessage()));
+ continue;
}
String dnsName;
if (useHostnames) {
@@ -129,16 +138,21 @@ public class NodeDecommissionManager {
}
List<DatanodeDetails> found = nodeManager.getNodesByAddress(dnsName);
if (found.isEmpty()) {
- throw new InvalidHostStringException("Host " + host.getRawHostname()
+ msg = "Host " + host.getRawHostname()
+ " (" + dnsName + ") is not running any datanodes registered"
- + " with SCM. Please check the host name.");
+ + " with SCM. Please check the host name.";
+ LOG.warn(msg);
+ errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
} else if (found.size() == 1) {
if (host.getPort() != -1 &&
!validateDNPortMatch(host.getPort(), found.get(0))) {
- throw new InvalidHostStringException("Host " + host.getRawHostname()
+ msg = "Host " + host.getRawHostname()
+ " is running a datanode registered with SCM,"
+ " but the port number doesn't match."
- + " Please check the port number.");
+ + " Please check the port number.";
+ LOG.warn(msg);
+ errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
+ continue;
}
results.add(found.get(0));
} else {
@@ -149,13 +163,17 @@ public class NodeDecommissionManager {
// should be the same, and we should just use the one with the most
// recent heartbeat.
if (host.getPort() != -1) {
- found.removeIf(dn -> !validateDNPortMatch(host.getPort(), dn));
+ HostDefinition finalHost = host;
+ found.removeIf(dn -> !validateDNPortMatch(finalHost.getPort(), dn));
}
if (found.isEmpty()) {
- throw new InvalidHostStringException("Host " + host.getRawHostname()
+ msg = "Host " + host.getRawHostname()
+ " is running multiple datanodes registered with SCM,"
+ " but no port numbers match."
- + " Please check the port number.");
+ + " Please check the port number.";
+ LOG.warn(msg);
+ errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
+ continue;
} else if (found.size() == 1) {
results.add(found.get(0));
continue;
@@ -168,19 +186,24 @@ public class NodeDecommissionManager {
// not possible for a host to have 2 DNs coming from the same port.
DatanodeDetails mostRecent = findDnWithMostRecentHeartbeat(found);
if (mostRecent == null) {
- throw new InvalidHostStringException("Host " +
host.getRawHostname()
+ msg = "Host " + host.getRawHostname()
+ " has multiple datanodes registered with SCM."
+ " All have identical ports, but none have a newest"
- + " heartbeat.");
+ + " heartbeat.";
+ LOG.warn(msg);
+ errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
+ continue;
}
results.add(mostRecent);
} else {
// We have no passed in port or the ports in SCM do not all match, so
// we cannot decide which DN to use.
- throw new InvalidHostStringException("Host " + host.getRawHostname()
+ msg = "Host " + host.getRawHostname()
+ " is running multiple datanodes registered with SCM,"
+ " but no port numbers match."
- + " Please check the port number.");
+ + " Please check the port number.";
+ LOG.warn(msg);
+ errors.add(new DatanodeAdminError(host.getRawHostname(), msg));
}
}
}
@@ -291,9 +314,9 @@ public class NodeDecommissionManager {
}
public synchronized List<DatanodeAdminError> decommissionNodes(
- List<String> nodes) throws InvalidHostStringException {
- List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
+ List<String> nodes) {
List<DatanodeAdminError> errors = new ArrayList<>();
+ List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes, errors);
for (DatanodeDetails dn : dns) {
try {
startDecommission(dn);
@@ -357,8 +380,8 @@ public class NodeDecommissionManager {
public synchronized List<DatanodeAdminError> recommissionNodes(
List<String> nodes) throws InvalidHostStringException {
- List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
List<DatanodeAdminError> errors = new ArrayList<>();
+ List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes, errors);
for (DatanodeDetails dn : dns) {
try {
recommission(dn);
@@ -394,8 +417,8 @@ public class NodeDecommissionManager {
public synchronized List<DatanodeAdminError> startMaintenanceNodes(
List<String> nodes, int endInHours) throws InvalidHostStringException {
- List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
List<DatanodeAdminError> errors = new ArrayList<>();
+ List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes, errors);
for (DatanodeDetails dn : dns) {
try {
startMaintenance(dn, endInHours);
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
index c0e3b804bf..55b9547f5a 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
@@ -29,6 +29,7 @@ import
org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.hdds.server.events.EventQueue;
import
org.apache.hadoop.security.authentication.client.AuthenticationException;
+import org.junit.Assert;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -94,48 +95,53 @@ public class TestNodeDecommissionManager {
}
@Test
- public void testAnyInvalidHostThrowsException()
- throws InvalidHostStringException {
+ public void testAnyInvalidHostThrowsException() {
List<DatanodeDetails> dns = generateDatanodes();
// Try to decommission a host that does exist, but give incorrect port
- try {
- decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress() +
":10"));
- fail("InvalidHostStringException expected");
- } catch (InvalidHostStringException e) {
- }
+ List<DatanodeAdminError> error =
+ decom.decommissionNodes(
+ Arrays.asList(dns.get(1).getIpAddress() + ":10"));
+ Assert.assertTrue(error.size() == 1);
+ Assert.assertTrue(
+ error.get(0).getHostname().contains(dns.get(1).getIpAddress()));
// Try to decommission a host that does not exist
- try {
- decom.decommissionNodes(Arrays.asList("123.123.123.123"));
- fail("InvalidHostStringException expected");
- } catch (InvalidHostStringException e) {
- }
+ error = decom.decommissionNodes(Arrays.asList("123.123.123.123"));
+ Assert.assertTrue(error.size() == 1);
+ Assert.assertTrue(
+ error.get(0).getHostname().contains("123.123.123.123"));
// Try to decommission a host that does exist and a host that does not
- try {
- decom.decommissionNodes(Arrays.asList(
+ error = decom.decommissionNodes(Arrays.asList(
dns.get(1).getIpAddress(), "123,123,123,123"));
- fail("InvalidHostStringException expected");
- } catch (InvalidHostStringException e) {
- }
+ Assert.assertTrue(error.size() == 1);
+ Assert.assertTrue(
+ error.get(0).getHostname().contains("123,123,123,123"));
// Try to decommission a host with many DNs on the address with no port
- try {
- decom.decommissionNodes(Arrays.asList(
+ error = decom.decommissionNodes(Arrays.asList(
dns.get(0).getIpAddress()));
- fail("InvalidHostStringException expected");
- } catch (InvalidHostStringException e) {
- }
+ Assert.assertTrue(error.size() == 1);
+ Assert.assertTrue(
+ error.get(0).getHostname().contains(dns.get(0).getIpAddress()));
// Try to decommission a host with many DNs on the address with a port
// that does not exist
- try {
- decom.decommissionNodes(Arrays.asList(
+ error = decom.decommissionNodes(Arrays.asList(
dns.get(0).getIpAddress() + ":10"));
- fail("InvalidHostStringException expected");
- } catch (InvalidHostStringException e) {
- }
+ Assert.assertTrue(error.size() == 1);
+ Assert.assertTrue(
+ error.get(0).getHostname().contains(dns.get(0).getIpAddress() +
":10"));
+
+ // Try to decommission 2 hosts with address that does not exist
+ // Both should return error
+ error = decom.decommissionNodes(Arrays.asList(
+ "123.123.123.123", "234.234.234.234"));
+ Assert.assertTrue(error.size() == 2);
+ Assert.assertTrue(
+ error.get(0).getHostname().contains("123.123.123.123") &&
+ error.get(1).getHostname().contains("234.234.234.234"));
}
@Test
@@ -219,11 +225,12 @@ public class TestNodeDecommissionManager {
nodeManager.register(extraDN, null, null);
// Attempt to decommission with just the IP, which should fail.
- try {
- decom.decommissionNodes(Arrays.asList(extraDN.getIpAddress()));
- fail("InvalidHostStringException expected");
- } catch (InvalidHostStringException e) {
- }
+ List<DatanodeAdminError> error =
+ decom.decommissionNodes(Arrays.asList(extraDN.getIpAddress()));
+ Assert.assertTrue(error.size() == 1);
+ Assert.assertTrue(
+ error.get(0).getHostname().contains(extraDN.getIpAddress()));
+
// Now try the one with the unique port
decom.decommissionNodes(Arrays.asList(
extraDN.getIpAddress() + ":" + ratisPort + 1));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]