Repository: hadoop Updated Branches: refs/heads/branch-2 c58357939 -> c4cedfc1d
HDFS-7742. Favoring decommissioning node for replication can cause a block to stay underreplicated for long periods. Contributed by Nathan Roberts. (cherry picked from commit 04ee18ed48ceef34598f954ff40940abc9fde1d2) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c4cedfc1 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c4cedfc1 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c4cedfc1 Branch: refs/heads/branch-2 Commit: c4cedfc1d601127430c70ca8ca4d4e2ee2d1003d Parents: c583579 Author: Kihwal Lee <kih...@apache.org> Authored: Mon Mar 30 10:11:25 2015 -0500 Committer: Kihwal Lee <kih...@apache.org> Committed: Mon Mar 30 10:11:25 2015 -0500 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../server/blockmanagement/BlockManager.java | 10 ++--- .../blockmanagement/TestBlockManager.java | 42 ++++++++++++++++++++ 3 files changed, 50 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c4cedfc1/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index abc3d9a..6cf5227 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -520,6 +520,9 @@ Release 2.7.0 - UNRELEASED HDFS-7410. Support CreateFlags with append() to support hsync() for appending streams (Vinayakumar B via Colin P. McCabe) + HDFS-7742. Favoring decommissioning node for replication can cause a block + to stay underreplicated for long periods (Nathan Roberts via kihwal) + OPTIMIZATIONS HDFS-7454. Reduce memory footprint for AclEntries in NameNode. http://git-wip-us.apache.org/repos/asf/hadoop/blob/c4cedfc1/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 0ccd0bb..11965c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -1640,7 +1640,8 @@ public class BlockManager { // If so, do not select the node as src node if ((nodesCorrupt != null) && nodesCorrupt.contains(node)) continue; - if(priority != UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY + if(priority != UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY + && !node.isDecommissionInProgress() && node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams) { continue; // already reached replication limit @@ -1655,13 +1656,12 @@ public class BlockManager { // never use already decommissioned nodes if(node.isDecommissioned()) continue; - // we prefer nodes that are in DECOMMISSION_INPROGRESS state - if(node.isDecommissionInProgress() || srcNode == null) { + + // We got this far, current node is a reasonable choice + if (srcNode == null) { srcNode = node; continue; } - if(srcNode.isDecommissionInProgress()) - continue; // switch to a different node randomly // this to prevent from deterministically selecting the same node even // if the node failed to replicate the block on previous iterations http://git-wip-us.apache.org/repos/asf/hadoop/blob/c4cedfc1/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index 707c780..91abb2a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -535,6 +535,48 @@ public class TestBlockManager { } @Test + public void testFavorDecomUntilHardLimit() throws Exception { + bm.maxReplicationStreams = 0; + bm.replicationStreamsHardLimit = 1; + + long blockId = 42; // arbitrary + Block aBlock = new Block(blockId, 0, 0); + List<DatanodeDescriptor> origNodes = getNodes(0, 1); + // Add the block to the first node. + addBlockOnNodes(blockId,origNodes.subList(0,1)); + origNodes.get(0).startDecommission(); + + List<DatanodeDescriptor> cntNodes = new LinkedList<DatanodeDescriptor>(); + List<DatanodeStorageInfo> liveNodes = new LinkedList<DatanodeStorageInfo>(); + + assertNotNull("Chooses decommissioning source node for a normal replication" + + " if all available source nodes have reached their replication" + + " limits below the hard limit.", + bm.chooseSourceDatanode( + aBlock, + cntNodes, + liveNodes, + new NumberReplicas(), + UnderReplicatedBlocks.QUEUE_UNDER_REPLICATED)); + + + // Increase the replication count to test replication count > hard limit + DatanodeStorageInfo targets[] = { origNodes.get(1).getStorageInfos()[0] }; + origNodes.get(0).addBlockToBeReplicated(aBlock, targets); + + assertNull("Does not choose a source decommissioning node for a normal" + + " replication when all available nodes exceed the hard limit.", + bm.chooseSourceDatanode( + aBlock, + cntNodes, + liveNodes, + new NumberReplicas(), + UnderReplicatedBlocks.QUEUE_UNDER_REPLICATED)); + } + + + + @Test public void testSafeModeIBR() throws Exception { DatanodeDescriptor node = spy(nodes.get(0)); DatanodeStorageInfo ds = node.getStorageInfos()[0];