KevinWikant commented on a change in pull request #3675:
URL: https://github.com/apache/hadoop/pull/3675#discussion_r762076676
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
##########
@@ -1654,4 +1658,139 @@ public Boolean get() {
cleanupFile(fileSys, file);
}
+
+ /**
+ * Test DatanodeAdminManager logic to re-queue unhealthy decommissioning
nodes
+ * which are blocking the decommissioning of healthy nodes.
+ * Force the tracked nodes set to be filled with nodes lost while
decommissioning,
+ * then decommission healthy nodes & validate they are decommissioned
eventually.
+ */
+ @Test(timeout = 120000)
+ public void testRequeueUnhealthyDecommissioningNodes() throws Exception {
+ // Allow 3 datanodes to be decommissioned at a time
+
getConf().setInt(DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_MAX_CONCURRENT_TRACKED_NODES,
3);
+ // Disable the normal monitor runs
+ getConf()
+ .setInt(MiniDFSCluster.DFS_NAMENODE_DECOMMISSION_INTERVAL_TESTING_KEY,
Integer.MAX_VALUE);
+
+ // Start cluster with 6 datanodes
+ startCluster(1, 6);
+ final FSNamesystem namesystem = getCluster().getNamesystem();
+ final BlockManager blockManager = namesystem.getBlockManager();
+ final DatanodeManager datanodeManager = blockManager.getDatanodeManager();
+ final DatanodeAdminManager decomManager =
datanodeManager.getDatanodeAdminManager();
+ assertEquals(6, getCluster().getDataNodes().size());
+
+ // 3 datanodes will be "live" datanodes that are expected to be
decommissioned eventually
+ final List<DatanodeDescriptor> liveNodes =
getCluster().getDataNodes().subList(3, 6).stream()
+ .map(dn -> getDatanodeDesriptor(namesystem, dn.getDatanodeUuid()))
+ .collect(Collectors.toList());
+ assertEquals(3, liveNodes.size());
+
+ // 3 datanodes will be "dead" datanodes that are expected to never be
decommissioned
+ final List<DatanodeDescriptor> deadNodes =
getCluster().getDataNodes().subList(0, 3).stream()
+ .map(dn -> getDatanodeDesriptor(namesystem, dn.getDatanodeUuid()))
+ .collect(Collectors.toList());
+ assertEquals(3, deadNodes.size());
+
+ // Need to create some data or "isNodeHealthyForDecommissionOrMaintenance"
+ // may unexpectedly return true for a dead node
+ writeFile(getCluster().getFileSystem(), new Path("/tmp/test1"), 1, 100);
Review comment:
should use a larger replication factor here to ensure there are
LowRendundancy blocks
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]