Repository: hadoop Updated Branches: refs/heads/branch-2 7dfcf4035 -> 440379a0d
HDFS-9549. TestCacheDirectives#testExceedsCapacity is flaky (Xiao Chen via cmccabe) (cherry picked from commit 211c78c09073e5b34db309b49d8de939a7a812f5) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/440379a0 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/440379a0 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/440379a0 Branch: refs/heads/branch-2 Commit: 440379a0d4cafd2641a666b954ebc04ba0353f13 Parents: 7dfcf40 Author: Colin Patrick Mccabe <[email protected]> Authored: Tue Feb 23 12:01:20 2016 -0800 Committer: Colin Patrick Mccabe <[email protected]> Committed: Tue Feb 23 12:05:53 2016 -0800 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../CacheReplicationMonitor.java | 27 +++++++++++++++++--- .../server/blockmanagement/DatanodeManager.java | 9 +++++++ .../server/namenode/TestCacheDirectives.java | 7 +---- 4 files changed, 37 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/440379a0/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 8da6ed2..6af723f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1886,6 +1886,9 @@ Release 2.8.0 - UNRELEASED HDFS-9842. dfs.datanode.balance.bandwidthPerSec should accept friendly size units. (Lin Yiqun via Arpit Agarwal) + HDFS-9549. TestCacheDirectives#testExceedsCapacity is flaky (Xiao Chen via + cmccabe) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/440379a0/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java index 2f81ddf..87cd716 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java @@ -27,6 +27,7 @@ import java.util.Date; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.Random; import java.util.TreeMap; import java.util.concurrent.TimeUnit; @@ -491,6 +492,26 @@ public class CacheReplicationMonitor extends Thread implements Closeable { * Blocks that are over-replicated should be removed from Datanodes. */ private void rescanCachedBlockMap() { + // Remove pendingCached blocks that will make DN out-of-capacity. + Set<DatanodeDescriptor> datanodes = + blockManager.getDatanodeManager().getDatanodes(); + for (DatanodeDescriptor dn : datanodes) { + long remaining = dn.getCacheRemaining(); + for (Iterator<CachedBlock> it = dn.getPendingCached().iterator(); + it.hasNext();) { + CachedBlock cblock = it.next(); + BlockInfo blockInfo = blockManager. + getStoredBlock(new Block(cblock.getBlockId())); + if (blockInfo.getNumBytes() > remaining) { + LOG.debug("Block {}: removing from PENDING_CACHED for node {} " + + "because it cannot fit in remaining cache size {}.", + cblock.getBlockId(), dn.getDatanodeUuid(), remaining); + it.remove(); + } else { + remaining -= blockInfo.getNumBytes(); + } + } + } for (Iterator<CachedBlock> cbIter = cachedBlocks.iterator(); cbIter.hasNext(); ) { scannedBlocks++; @@ -531,7 +552,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable { DatanodeDescriptor datanode = iter.next(); datanode.getPendingCached().remove(cblock); iter.remove(); - LOG.trace("Block {}: removing from PENDING_CACHED for node {}" + LOG.trace("Block {}: removing from PENDING_CACHED for node {} " + "because we already have {} cached replicas and we only" + " need {}", cblock.getBlockId(), datanode.getDatanodeUuid(), numCached, @@ -686,8 +707,8 @@ public class CacheReplicationMonitor extends Thread implements Closeable { long pendingCapacity = pendingBytes + datanode.getCacheRemaining(); if (pendingCapacity < blockInfo.getNumBytes()) { LOG.trace("Block {}: DataNode {} is not a valid possibility " + - "because the block has size {}, but the DataNode only has {}" + - "bytes of cache remaining ({} pending bytes, {} already cached.", + "because the block has size {}, but the DataNode only has {} " + + "bytes of cache remaining ({} pending bytes, {} already cached.)", blockInfo.getBlockId(), datanode.getDatanodeUuid(), blockInfo.getNumBytes(), pendingCapacity, pendingBytes, datanode.getCacheRemaining()); http://git-wip-us.apache.org/repos/asf/hadoop/blob/440379a0/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index d302b9b..b41afdb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -410,6 +410,15 @@ public class DatanodeManager { return host2DatanodeMap.getDatanodeByXferAddr(host, xferPort); } + /** @return the datanode descriptors for all nodes. */ + public Set<DatanodeDescriptor> getDatanodes() { + final Set<DatanodeDescriptor> datanodes; + synchronized (this) { + datanodes = new HashSet<>(datanodeMap.values()); + } + return datanodes; + } + /** @return the Host2NodesMap */ public Host2NodesMap getHost2DatanodeMap() { return this.host2DatanodeMap; http://git-wip-us.apache.org/repos/asf/hadoop/blob/440379a0/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java index d6f526b..588f209 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java @@ -73,7 +73,6 @@ import org.apache.hadoop.hdfs.protocol.CachePoolStats; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; -import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; @@ -87,9 +86,6 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.GSet; -import org.apache.log4j.Level; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -1413,6 +1409,7 @@ public class TestCacheDirectives { */ private void checkPendingCachedEmpty(MiniDFSCluster cluster) throws Exception { + Thread.sleep(1000); cluster.getNamesystem().readLock(); try { final DatanodeManager datanodeManager = @@ -1444,7 +1441,6 @@ public class TestCacheDirectives { waitForCachedBlocks(namenode, -1, numCachedReplicas, "testExceeds:1"); checkPendingCachedEmpty(cluster); - Thread.sleep(1000); checkPendingCachedEmpty(cluster); // Try creating a file with giant-sized blocks that exceed cache capacity @@ -1452,7 +1448,6 @@ public class TestCacheDirectives { DFSTestUtil.createFile(dfs, fileName, 4096, fileLen, CACHE_CAPACITY * 2, (short) 1, 0xFADED); checkPendingCachedEmpty(cluster); - Thread.sleep(1000); checkPendingCachedEmpty(cluster); }
