ArafatKhan2198 commented on code in PR #6360:
URL: https://github.com/apache/ozone/pull/6360#discussion_r1571965179
##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java:
##########
@@ -1642,4 +1642,34 @@ private ReentrantReadWriteLock.WriteLock writeLock() {
private ReentrantReadWriteLock.ReadLock readLock() {
return lock.readLock();
}
+
+ /**
+ * This API allows removal of only DECOMMISSIONED and DEAD nodes from
NodeManager data structures and cleanup memory.
+ * This API call is having a pre-condition before removal of node like
following resources to be removed:
+ * --- all pipelines for datanode should be closed.
+ * --- all containers for datanode should be closed.
+ * --- remove all containers replicas maintained by datanode.
+ * --- clears all SCM DeletedBlockLog transaction records associated with
datanode.
+ *
+ * @param datanodeDetails
+ * @throws NodeNotFoundException
+ */
+ @Override
+ public void removeNode(DatanodeDetails datanodeDetails) throws
NodeNotFoundException, IOException {
+ writeLock().lock();
+ try {
+ NodeStatus nodeStatus = this.getNodeStatus(datanodeDetails);
+ if (datanodeDetails.isDecommissioned() || nodeStatus.isDead()) {
+ if (clusterMap.contains(datanodeDetails)) {
+ clusterMap.remove(datanodeDetails);
+ }
+ nodeStateManager.removeNode(datanodeDetails);
+ removeFromDnsToUuidMap(datanodeDetails.getUuid(),
datanodeDetails.getIpAddress());
+ final List<SCMCommand> cmdList =
getCommandQueue(datanodeDetails.getUuid());
+ LOG.info("Clearing command queue of size {} for DN {}",
cmdList.size(), datanodeDetails);
+ }
Review Comment:
Can we add an else statement here stating the reason for not removing the
node.
```
} else {
LOG.warn("Node not decommissioned or dead, cannot remove: {}",
datanodeDetails);
}
```
##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NodeEndpoint.java:
##########
@@ -171,4 +184,144 @@ private DatanodeStorageReport
getStorageReport(DatanodeDetails datanode) {
long committed = nodeStat.getCommitted().get();
return new DatanodeStorageReport(capacity, used, remaining, committed);
}
+
+ /**
+ * Removes datanodes from Recon's memory and nodes table in Recon DB.
+ * @param uuids the list of datanode uuid's
+ *
+ * @return JSON response with failed, not found and successfully removed
datanodes list.
+ */
+ @PUT
+ @Path("/remove")
+ @Consumes(MediaType.APPLICATION_JSON)
+ public Response removeDatanodes(List<String> uuids) {
+ List<DatanodeMetadata> failedDatanodes = new ArrayList<>();
+ List<DatanodeMetadata> notFoundDatanodes = new ArrayList<>();
+ List<DatanodeMetadata> removedDatanodes = new ArrayList<>();
+ Map<String, String> failedNodeErrorResponseMap = new HashMap<>();
+
+ Preconditions.checkNotNull(uuids, "Datanode list argument should not be
null");
+ Preconditions.checkArgument(!uuids.isEmpty(), "Datanode list argument
should not be empty");
+ try {
+ for (String uuid : uuids) {
+ DatanodeDetails nodeByUuid = nodeManager.getNodeByUuid(uuid);
+ try {
+ if (preChecksSuccess(nodeByUuid, failedNodeErrorResponseMap)) {
+ removedDatanodes.add(DatanodeMetadata.newBuilder()
+ .withHostname(nodeManager.getHostName(nodeByUuid))
+ .withUUid(uuid)
+ .withState(nodeManager.getNodeStatus(nodeByUuid).getHealth())
+ .build());
+ nodeManager.removeNode(nodeByUuid);
+ } else {
+ failedDatanodes.add(DatanodeMetadata.newBuilder()
+ .withHostname(nodeManager.getHostName(nodeByUuid))
+ .withUUid(uuid)
+ .withOperationalState(nodeByUuid.getPersistedOpState())
+ .withState(nodeManager.getNodeStatus(nodeByUuid).getHealth())
+ .build());
+ }
+ } catch (NodeNotFoundException nnfe) {
+ LOG.error("Selected node {} not found : {} ", uuid, nnfe);
+ notFoundDatanodes.add(DatanodeMetadata.newBuilder()
+ .withHostname("")
+ .withState(NodeState.DEAD)
+ .withUUid(uuid).build());
+ }
+ }
+ } catch (Exception exp) {
+ LOG.error("Unexpected Error while removing datanodes : {} ", exp);
+ throw new WebApplicationException(exp,
Response.Status.INTERNAL_SERVER_ERROR);
+ }
+
+ RemoveDataNodesResponseWrapper removeDataNodesResponseWrapper = new
RemoveDataNodesResponseWrapper();
+
+ if (!failedDatanodes.isEmpty()) {
+ DatanodesResponse failedNodesResp =
+ new DatanodesResponse(failedDatanodes.size(),
Collections.emptyList());
+
failedNodesResp.setFailedNodeErrorResponseMap(failedNodeErrorResponseMap);
+
removeDataNodesResponseWrapper.getDatanodesResponseMap().put("failedDatanodes",
failedNodesResp);
+ }
+
+ if (!notFoundDatanodes.isEmpty()) {
+ DatanodesResponse notFoundNodesResp =
+ new DatanodesResponse(notFoundDatanodes.size(), notFoundDatanodes);
+
removeDataNodesResponseWrapper.getDatanodesResponseMap().put("notFoundDatanodes",
notFoundNodesResp);
+ }
+
+ if (!removedDatanodes.isEmpty()) {
+ DatanodesResponse removedNodesResp =
+ new DatanodesResponse(removedDatanodes.size(), removedDatanodes);
+
removeDataNodesResponseWrapper.getDatanodesResponseMap().put("removedDatanodes",
removedNodesResp);
+ }
+ return Response.ok(removeDataNodesResponseWrapper).build();
+ }
+
+ private boolean preChecksSuccess(DatanodeDetails nodeByUuid, Map<String,
String> failedNodeErrorResponseMap)
+ throws NodeNotFoundException {
+ if (null == nodeByUuid) {
+ throw new NodeNotFoundException("Node not found !!!");
+ }
+ NodeStatus nodeStatus = null;
+ AtomicBoolean isContainerOrPipeLineOpen = new AtomicBoolean(false);
Review Comment:
Can we Replace AtomicBoolean with a simple boolean if threading isn't a
concern.?
##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NodeEndpoint.java:
##########
@@ -171,4 +184,144 @@ private DatanodeStorageReport
getStorageReport(DatanodeDetails datanode) {
long committed = nodeStat.getCommitted().get();
return new DatanodeStorageReport(capacity, used, remaining, committed);
}
+
+ /**
+ * Removes datanodes from Recon's memory and nodes table in Recon DB.
+ * @param uuids the list of datanode uuid's
+ *
+ * @return JSON response with failed, not found and successfully removed
datanodes list.
+ */
+ @PUT
+ @Path("/remove")
+ @Consumes(MediaType.APPLICATION_JSON)
+ public Response removeDatanodes(List<String> uuids) {
+ List<DatanodeMetadata> failedDatanodes = new ArrayList<>();
+ List<DatanodeMetadata> notFoundDatanodes = new ArrayList<>();
+ List<DatanodeMetadata> removedDatanodes = new ArrayList<>();
+ Map<String, String> failedNodeErrorResponseMap = new HashMap<>();
+
+ Preconditions.checkNotNull(uuids, "Datanode list argument should not be
null");
+ Preconditions.checkArgument(!uuids.isEmpty(), "Datanode list argument
should not be empty");
+ try {
+ for (String uuid : uuids) {
+ DatanodeDetails nodeByUuid = nodeManager.getNodeByUuid(uuid);
+ try {
+ if (preChecksSuccess(nodeByUuid, failedNodeErrorResponseMap)) {
+ removedDatanodes.add(DatanodeMetadata.newBuilder()
+ .withHostname(nodeManager.getHostName(nodeByUuid))
+ .withUUid(uuid)
+ .withState(nodeManager.getNodeStatus(nodeByUuid).getHealth())
+ .build());
+ nodeManager.removeNode(nodeByUuid);
Review Comment:
Log could be added right after a successful operation that conclusively
removes a node from the system.
```
// Log successful removal
LOG.info("Successfully removed node: {}", nodeByUuid.getUuidString());
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]