[ 
https://issues.apache.org/jira/browse/HDFS-17218?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17773573#comment-17773573
 ] 

Haiyang Hu commented on HDFS-17218:
-----------------------------------

This test UT maybe can reproduce this case.
{code:java}
@Test(timeout = 120000)
  public void testRemoveExcessRedundancyMapWhenDNRegisters() throws IOException,
      InterruptedException, TimeoutException {
    Configuration config = new HdfsConfiguration();
    // Bump up replication interval.
    config.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 
10000);
    DataNodeFaultInjector oldInjector = DataNodeFaultInjector.get();

    final Semaphore semaphore = new Semaphore(0);
    try (MiniDFSCluster cluster = new 
MiniDFSCluster.Builder(config).numDataNodes(3).build()) {
      DistributedFileSystem fs = cluster.getFileSystem();
      BlockManager blockManager = 
cluster.getNameNode().getNamesystem().getBlockManager();
      cluster.waitActive();

      final DataNodeFaultInjector injector = new DataNodeFaultInjector() {
        @Override
        public void delayDeleteReplica() {
          // Lets wait for the remove replica process.
          try {
            semaphore.acquire(1);
          } catch (InterruptedException e) {
            // ignore.
          }
        }
      };
      DataNodeFaultInjector.set(injector);

      // Create file.
      Path path = new Path("/testfile");
      DFSTestUtil.createFile(fs, path, 1024, (short) 3, 0);
      DFSTestUtil.waitReplication(fs, path, (short) 3);
      LocatedBlock lb = DFSTestUtil.getAllBlocks(fs, path).get(0);
      ExtendedBlock extendedBlock = lb.getBlock();
      DatanodeInfo[] loc = lb.getLocations();
      assertEquals(3, loc.length);

      // Set replication as 2, to choose excess.
      fs.setReplication(path, (short) 2);

      // Check excessRedundancyMap and invalidateBlocks size as 1.
      assertEquals(1, blockManager.getExcessBlocksCount());
      assertEquals(1, blockManager.getPendingDeletionBlocksCount());
      DataNode excessDn = Arrays.stream(loc).
          filter(datanodeInfo -> blockManager.getExcessSize4Testing(
              datanodeInfo.getDatanodeUuid()) > 0)
          .map(datanodeInfo -> cluster.getDataNode(datanodeInfo.getIpcPort()))
          .findFirst()
          .orElse(null);

      // Schedule blocks for deletion at excessDn.
      assertEquals(1, blockManager.computeInvalidateWork(1));
      // Check excessRedundancyMap size as 1.
      assertEquals(1, blockManager.getExcessBlocksCount());
      // Check invalidateBlocks size as 0.
      assertEquals(0, blockManager.getPendingDeletionBlocksCount());
      assertNotNull(excessDn);

      // Name node will ask datanode to delete replicas in heartbeat response.
      cluster.triggerHeartbeats();

      // Wait for the datanode to process any block deletions
      // that have already been asynchronously queued.
      DataNode finalExcessDn = excessDn;
      GenericTestUtils.waitFor(() ->
              
cluster.getFsDatasetTestUtils(finalExcessDn).getPendingAsyncDeletions() == 1,
          100, 1000);

      // Restart the datanode.
      int ipcPort = excessDn.getDatanodeId().getIpcPort();
      MiniDFSCluster.DataNodeProperties dataNodeProperties = 
cluster.stopDataNode(
          excessDn.getDatanodeId().getXferAddr());
      assertTrue(cluster.restartDataNode(dataNodeProperties, true));
      cluster.waitActive();
      semaphore.release(1);

      cluster.triggerHeartbeats();
      cluster.triggerBlockReports();

      // Check excessRedundancyMap size as 1 and still include excessDn.
      assertEquals(1, 
blockManager.getExcessSize4Testing(excessDn.getDatanodeUuid()));
      assertEquals(1, blockManager.getExcessBlocksCount());
      // Check invalidateBlocks as 0.
      assertEquals(0, blockManager.getPendingDeletionBlocksCount());
      // Schedule blocks for deletion
      assertEquals(0, blockManager.computeInvalidateWork(1));

      // Check replica is exists in excessDn.
      excessDn = cluster.getDataNode(ipcPort);
      
assertNotNull(cluster.getFsDatasetTestUtils(excessDn).fetchReplica(extendedBlock));
      // Check not pending async deletions in excessDn.
      assertEquals(0, 
cluster.getFsDatasetTestUtils(excessDn).getPendingAsyncDeletions());

      // Test get blocks as 3.
      assertEquals(3, DFSTestUtil.getAllBlocks(fs, 
path).get(0).getLocations().length);
    } finally {
      DataNodeFaultInjector.set(oldInjector);
  }
{code}


> NameNode should remove its excess blocks from the ExcessRedundancyMap When a 
> DN registers
> -----------------------------------------------------------------------------------------
>
>                 Key: HDFS-17218
>                 URL: https://issues.apache.org/jira/browse/HDFS-17218
>             Project: Hadoop HDFS
>          Issue Type: Improvement
>          Components: namanode
>            Reporter: Haiyang Hu
>            Assignee: Haiyang Hu
>            Priority: Major
>
> Currently found that DN will lose all pending DNA_INVALIDATE blocks if it 
> restarts.
> *Root case*
> Current DN enables asynchronously deletion, it have many pending deletion 
> blocks in memory.
> when DN restarts, these cached blocks may be lost. it causes some blocks in 
> the excess map in the namenode to be leaked and this will result in many 
> blocks having more replicas then expected.
> *solution*
> Consider NameNode should remove its excess blocks from the 
> ExcessRedundancyMap When a DN registers,
> this approach will ensure that when processing the DN's full block report, 
> the 'processExtraRedundancy' can be performed according to the actual of the 
> blocks.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org

Reply via email to