[ https://issues.apache.org/jira/browse/HDFS-17218?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17773573#comment-17773573 ]
Haiyang Hu commented on HDFS-17218: ----------------------------------- This test UT maybe can reproduce this case. {code:java} @Test(timeout = 120000) public void testRemoveExcessRedundancyMapWhenDNRegisters() throws IOException, InterruptedException, TimeoutException { Configuration config = new HdfsConfiguration(); // Bump up replication interval. config.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 10000); DataNodeFaultInjector oldInjector = DataNodeFaultInjector.get(); final Semaphore semaphore = new Semaphore(0); try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(config).numDataNodes(3).build()) { DistributedFileSystem fs = cluster.getFileSystem(); BlockManager blockManager = cluster.getNameNode().getNamesystem().getBlockManager(); cluster.waitActive(); final DataNodeFaultInjector injector = new DataNodeFaultInjector() { @Override public void delayDeleteReplica() { // Lets wait for the remove replica process. try { semaphore.acquire(1); } catch (InterruptedException e) { // ignore. } } }; DataNodeFaultInjector.set(injector); // Create file. Path path = new Path("/testfile"); DFSTestUtil.createFile(fs, path, 1024, (short) 3, 0); DFSTestUtil.waitReplication(fs, path, (short) 3); LocatedBlock lb = DFSTestUtil.getAllBlocks(fs, path).get(0); ExtendedBlock extendedBlock = lb.getBlock(); DatanodeInfo[] loc = lb.getLocations(); assertEquals(3, loc.length); // Set replication as 2, to choose excess. fs.setReplication(path, (short) 2); // Check excessRedundancyMap and invalidateBlocks size as 1. assertEquals(1, blockManager.getExcessBlocksCount()); assertEquals(1, blockManager.getPendingDeletionBlocksCount()); DataNode excessDn = Arrays.stream(loc). filter(datanodeInfo -> blockManager.getExcessSize4Testing( datanodeInfo.getDatanodeUuid()) > 0) .map(datanodeInfo -> cluster.getDataNode(datanodeInfo.getIpcPort())) .findFirst() .orElse(null); // Schedule blocks for deletion at excessDn. assertEquals(1, blockManager.computeInvalidateWork(1)); // Check excessRedundancyMap size as 1. assertEquals(1, blockManager.getExcessBlocksCount()); // Check invalidateBlocks size as 0. assertEquals(0, blockManager.getPendingDeletionBlocksCount()); assertNotNull(excessDn); // Name node will ask datanode to delete replicas in heartbeat response. cluster.triggerHeartbeats(); // Wait for the datanode to process any block deletions // that have already been asynchronously queued. DataNode finalExcessDn = excessDn; GenericTestUtils.waitFor(() -> cluster.getFsDatasetTestUtils(finalExcessDn).getPendingAsyncDeletions() == 1, 100, 1000); // Restart the datanode. int ipcPort = excessDn.getDatanodeId().getIpcPort(); MiniDFSCluster.DataNodeProperties dataNodeProperties = cluster.stopDataNode( excessDn.getDatanodeId().getXferAddr()); assertTrue(cluster.restartDataNode(dataNodeProperties, true)); cluster.waitActive(); semaphore.release(1); cluster.triggerHeartbeats(); cluster.triggerBlockReports(); // Check excessRedundancyMap size as 1 and still include excessDn. assertEquals(1, blockManager.getExcessSize4Testing(excessDn.getDatanodeUuid())); assertEquals(1, blockManager.getExcessBlocksCount()); // Check invalidateBlocks as 0. assertEquals(0, blockManager.getPendingDeletionBlocksCount()); // Schedule blocks for deletion assertEquals(0, blockManager.computeInvalidateWork(1)); // Check replica is exists in excessDn. excessDn = cluster.getDataNode(ipcPort); assertNotNull(cluster.getFsDatasetTestUtils(excessDn).fetchReplica(extendedBlock)); // Check not pending async deletions in excessDn. assertEquals(0, cluster.getFsDatasetTestUtils(excessDn).getPendingAsyncDeletions()); // Test get blocks as 3. assertEquals(3, DFSTestUtil.getAllBlocks(fs, path).get(0).getLocations().length); } finally { DataNodeFaultInjector.set(oldInjector); } {code} > NameNode should remove its excess blocks from the ExcessRedundancyMap When a > DN registers > ----------------------------------------------------------------------------------------- > > Key: HDFS-17218 > URL: https://issues.apache.org/jira/browse/HDFS-17218 > Project: Hadoop HDFS > Issue Type: Improvement > Components: namanode > Reporter: Haiyang Hu > Assignee: Haiyang Hu > Priority: Major > > Currently found that DN will lose all pending DNA_INVALIDATE blocks if it > restarts. > *Root case* > Current DN enables asynchronously deletion, it have many pending deletion > blocks in memory. > when DN restarts, these cached blocks may be lost. it causes some blocks in > the excess map in the namenode to be leaked and this will result in many > blocks having more replicas then expected. > *solution* > Consider NameNode should remove its excess blocks from the > ExcessRedundancyMap When a DN registers, > this approach will ensure that when processing the DN's full block report, > the 'processExtraRedundancy' can be performed according to the actual of the > blocks. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org