[ https://issues.apache.org/jira/browse/HDFS-16479?focusedWorklogId=755009&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-755009 ]
ASF GitHub Bot logged work on HDFS-16479: ----------------------------------------- Author: ASF GitHub Bot Created on: 10/Apr/22 09:22 Start Date: 10/Apr/22 09:22 Worklog Time Spent: 10m Work Description: ayushtkn commented on code in PR #4138: URL: https://github.com/apache/hadoop/pull/4138#discussion_r846742614 ########## hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java: ########## @@ -2163,6 +2163,17 @@ BlockReconstructionWork scheduleReconstruction(BlockInfo block, return null; } + // skip if source datanodes for reconstructing ec block are not enough + if (block.isStriped()) { + BlockInfoStriped stripedBlock = (BlockInfoStriped) block; + int cellsNum = (int) ((stripedBlock.getNumBytes() - 1) / stripedBlock.getCellSize() + 1); + int minRequiredSources = Math.min(cellsNum, stripedBlock.getDataBlockNum()); + if (minRequiredSources > srcNodes.length) { + LOG.debug("Block {} cannot be reconstructed due to shortage of source datanodes ", block); + return null; Review Comment: Should we increment the metrics before returning ``null`` ``` NameNode.getNameNodeMetrics().incNumTimesReReplicationNotScheduled(); ``` ########## hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java: ########## @@ -2163,6 +2163,17 @@ BlockReconstructionWork scheduleReconstruction(BlockInfo block, return null; } + // skip if source datanodes for reconstructing ec block are not enough + if (block.isStriped()) { + BlockInfoStriped stripedBlock = (BlockInfoStriped) block; + int cellsNum = (int) ((stripedBlock.getNumBytes() - 1) / stripedBlock.getCellSize() + 1); + int minRequiredSources = Math.min(cellsNum, stripedBlock.getDataBlockNum()); Review Comment: Is this logic same as BlockInfoStriped.getRealDataBlockNum() can we use or extract the logic from there? or do some refactoring there, just trying if we can keep the logic at one place, in case there is some issue in the logic changing at one places fixes all the places.. ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java: ########## @@ -852,6 +852,101 @@ public void testChooseSrcDNWithDupECInDecommissioningNode() throws Exception { 0, numReplicas.redundantInternalBlocks()); } + @Test + public void testSkipReconstructionWithManyBusyNodes() { + long blockId = -9223372036854775776L; // real ec block id + // RS-3-2 EC policy + ErasureCodingPolicy ecPolicy = + SystemErasureCodingPolicies.getPolicies().get(1); + + // striped blockInfo: 3 data blocks + 2 parity blocks + Block aBlock = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0); + BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlock, ecPolicy); + + // create 4 storageInfo, which means 1 block is missing + DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo( + "storage1", "1.1.1.1", "rack1", "host1"); + DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo( + "storage2", "2.2.2.2", "rack2", "host2"); + DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo( + "storage3", "3.3.3.3", "rack3", "host3"); + DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo( + "storage4", "4.4.4.4", "rack4", "host4"); + + // link block with storage + aBlockInfoStriped.addStorage(ds1, aBlock); + aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0)); + aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0)); + aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0)); + + addEcBlockToBM(blockId, ecPolicy); + aBlockInfoStriped.setBlockCollectionId(mockINodeId); + + // reconstruction should be scheduled + BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3); + assertNotNull(work); + + // simulate the 2 nodes reach maxReplicationStreams + for(int i = 0; i < bm.maxReplicationStreams; i++){ + ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets(); + ds4.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets(); + } + + // reconstruction should be skipped since the number of non-busy nodes are not enough + work = bm.scheduleReconstruction(aBlockInfoStriped, 3); + assertNull(work); + } + + @Test + public void testSkipReconstructionWithManyBusyNodes2() { + long blockId = -9223372036854775776L; // real ec block id + // RS-3-2 EC policy + ErasureCodingPolicy ecPolicy = + SystemErasureCodingPolicies.getPolicies().get(1); + + // striped blockInfo: 2 data blocks + 2 paritys Review Comment: typo `paritys` ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java: ########## @@ -852,6 +852,101 @@ public void testChooseSrcDNWithDupECInDecommissioningNode() throws Exception { 0, numReplicas.redundantInternalBlocks()); } + @Test + public void testSkipReconstructionWithManyBusyNodes() { + long blockId = -9223372036854775776L; // real ec block id + // RS-3-2 EC policy + ErasureCodingPolicy ecPolicy = + SystemErasureCodingPolicies.getPolicies().get(1); + + // striped blockInfo: 3 data blocks + 2 parity blocks + Block aBlock = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0); + BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlock, ecPolicy); + + // create 4 storageInfo, which means 1 block is missing + DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo( + "storage1", "1.1.1.1", "rack1", "host1"); + DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo( + "storage2", "2.2.2.2", "rack2", "host2"); + DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo( + "storage3", "3.3.3.3", "rack3", "host3"); + DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo( + "storage4", "4.4.4.4", "rack4", "host4"); + + // link block with storage + aBlockInfoStriped.addStorage(ds1, aBlock); + aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0)); + aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0)); + aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0)); + + addEcBlockToBM(blockId, ecPolicy); + aBlockInfoStriped.setBlockCollectionId(mockINodeId); + + // reconstruction should be scheduled + BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3); + assertNotNull(work); + + // simulate the 2 nodes reach maxReplicationStreams + for(int i = 0; i < bm.maxReplicationStreams; i++){ + ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets(); + ds4.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets(); + } + + // reconstruction should be skipped since the number of non-busy nodes are not enough + work = bm.scheduleReconstruction(aBlockInfoStriped, 3); + assertNull(work); + } + + @Test + public void testSkipReconstructionWithManyBusyNodes2() { + long blockId = -9223372036854775776L; // real ec block id + // RS-3-2 EC policy + ErasureCodingPolicy ecPolicy = + SystemErasureCodingPolicies.getPolicies().get(1); + + // striped blockInfo: 2 data blocks + 2 paritys + Block aBlock = new Block(blockId, ecPolicy.getCellSize() * (ecPolicy.getNumDataUnits() - 1), 0); + BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlock, ecPolicy); Review Comment: nit: Can you use a better variable name, couldn't decode what does `a` stands for, or drop a comment above. Issue Time Tracking ------------------- Worklog Id: (was: 755009) Time Spent: 1h 10m (was: 1h) > EC: NameNode should not send a reconstruction work when the source datanodes > are insufficient > --------------------------------------------------------------------------------------------- > > Key: HDFS-16479 > URL: https://issues.apache.org/jira/browse/HDFS-16479 > Project: Hadoop HDFS > Issue Type: Bug > Components: ec, erasure-coding > Reporter: Yuanbo Liu > Priority: Critical > Labels: pull-request-available > Time Spent: 1h 10m > Remaining Estimate: 0h > > We got this exception from DataNodes > {color:#707070}java.lang.IllegalArgumentException: No enough live striped > blocks.{color} > {color:#707070} at > com.google.common.base.Preconditions.checkArgument(Preconditions.java:141){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.erasurecode.StripedReader.<init>(StripedReader.java:128){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.erasurecode.StripedReconstructor.<init>(StripedReconstructor.java:135){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.erasurecode.StripedBlockReconstructor.<init>(StripedBlockReconstructor.java:41){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.erasurecode.ErasureCodingWorker.processErasureCodingTasks(ErasureCodingWorker.java:133){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPOfferService.processCommandFromActive(BPOfferService.java:796){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPOfferService.processCommandFromActor(BPOfferService.java:680){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor$CommandProcessingThread.processCommand(BPServiceActor.java:1314){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor$CommandProcessingThread.lambda$enqueue$2(BPServiceActor.java:1360){color} > {color:#707070} at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor$CommandProcessingThread.processQueue(BPServiceActor.java:1287){color} > After going through the code of ErasureCodingWork.java, we found > {code:java} > targets[0].getDatanodeDescriptor().addBlockToBeErasureCoded( new > ExtendedBlock(blockPoolId, stripedBlk), getSrcNodes(), targets, > getLiveBlockIndicies(), stripedBlk.getErasureCodingPolicy()); > {code} > > the liveBusyBlockIndicies is not considered as liveBlockIndicies, hence > erasure coding reconstruction sometimes will fail as 'No enough live striped > blocks'. -- This message was sent by Atlassian Jira (v8.20.1#820001) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org