[ https://issues.apache.org/jira/browse/HDFS-16479?focusedWorklogId=755529&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-755529 ]
ASF GitHub Bot logged work on HDFS-16479: ----------------------------------------- Author: ASF GitHub Bot Created on: 12/Apr/22 02:01 Start Date: 12/Apr/22 02:01 Worklog Time Spent: 10m Work Description: tasanuma commented on code in PR #4138: URL: https://github.com/apache/hadoop/pull/4138#discussion_r847876025 ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java: ########## @@ -852,6 +852,101 @@ public void testChooseSrcDNWithDupECInDecommissioningNode() throws Exception { 0, numReplicas.redundantInternalBlocks()); } + @Test + public void testSkipReconstructionWithManyBusyNodes() { + long blockId = -9223372036854775776L; // real ec block id + // RS-3-2 EC policy + ErasureCodingPolicy ecPolicy = + SystemErasureCodingPolicies.getPolicies().get(1); + + // striped blockInfo: 3 data blocks + 2 parity blocks + Block aBlock = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0); + BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlock, ecPolicy); + + // create 4 storageInfo, which means 1 block is missing + DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo( + "storage1", "1.1.1.1", "rack1", "host1"); + DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo( + "storage2", "2.2.2.2", "rack2", "host2"); + DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo( + "storage3", "3.3.3.3", "rack3", "host3"); + DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo( + "storage4", "4.4.4.4", "rack4", "host4"); + + // link block with storage + aBlockInfoStriped.addStorage(ds1, aBlock); + aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0)); + aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0)); + aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0)); + + addEcBlockToBM(blockId, ecPolicy); + aBlockInfoStriped.setBlockCollectionId(mockINodeId); + + // reconstruction should be scheduled + BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3); + assertNotNull(work); + + // simulate the 2 nodes reach maxReplicationStreams + for(int i = 0; i < bm.maxReplicationStreams; i++){ + ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets(); + ds4.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets(); + } + + // reconstruction should be skipped since the number of non-busy nodes are not enough + work = bm.scheduleReconstruction(aBlockInfoStriped, 3); + assertNull(work); + } + + @Test + public void testSkipReconstructionWithManyBusyNodes2() { + long blockId = -9223372036854775776L; // real ec block id + // RS-3-2 EC policy + ErasureCodingPolicy ecPolicy = + SystemErasureCodingPolicies.getPolicies().get(1); + + // striped blockInfo: 2 data blocks + 2 paritys + Block aBlock = new Block(blockId, ecPolicy.getCellSize() * (ecPolicy.getNumDataUnits() - 1), 0); + BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlock, ecPolicy); Review Comment: I updated the variable name. I want to keep the comment to clarify the difference between `testSkipReconstructionWithManyBusyNodes` and `testSkipReconstructionWithManyBusyNodes2`. Issue Time Tracking ------------------- Worklog Id: (was: 755529) Time Spent: 1.5h (was: 1h 20m) > EC: NameNode should not send a reconstruction work when the source datanodes > are insufficient > --------------------------------------------------------------------------------------------- > > Key: HDFS-16479 > URL: https://issues.apache.org/jira/browse/HDFS-16479 > Project: Hadoop HDFS > Issue Type: Bug > Components: ec, erasure-coding > Reporter: Yuanbo Liu > Priority: Critical > Labels: pull-request-available > Time Spent: 1.5h > Remaining Estimate: 0h > > We got this exception from DataNodes > {color:#707070}java.lang.IllegalArgumentException: No enough live striped > blocks.{color} > {color:#707070} at > com.google.common.base.Preconditions.checkArgument(Preconditions.java:141){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.erasurecode.StripedReader.<init>(StripedReader.java:128){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.erasurecode.StripedReconstructor.<init>(StripedReconstructor.java:135){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.erasurecode.StripedBlockReconstructor.<init>(StripedBlockReconstructor.java:41){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.erasurecode.ErasureCodingWorker.processErasureCodingTasks(ErasureCodingWorker.java:133){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPOfferService.processCommandFromActive(BPOfferService.java:796){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPOfferService.processCommandFromActor(BPOfferService.java:680){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor$CommandProcessingThread.processCommand(BPServiceActor.java:1314){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor$CommandProcessingThread.lambda$enqueue$2(BPServiceActor.java:1360){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor$CommandProcessingThread.processQueue(BPServiceActor.java:1287){color} > After going through the code of ErasureCodingWork.java, we found > {code:java} > targets[0].getDatanodeDescriptor().addBlockToBeErasureCoded( new > ExtendedBlock(blockPoolId, stripedBlk), getSrcNodes(), targets, > getLiveBlockIndicies(), stripedBlk.getErasureCodingPolicy()); > {code} > > the liveBusyBlockIndicies is not considered as liveBlockIndicies, hence > erasure coding reconstruction sometimes will fail as 'No enough live striped > blocks'. -- This message was sent by Atlassian Jira (v8.20.1#820001) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org