Author: wang Date: Wed Oct 9 21:30:08 2013 New Revision: 1530802 URL: http://svn.apache.org/r1530802 Log: HDFS-5304. Expose if a block replica is cached in getFileBlockLocations. (Contributed by Andrew Wang)
Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt?rev=1530802&r1=1530801&r2=1530802&view=diff ============================================================================== --- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt (original) +++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt Wed Oct 9 21:30:08 2013 @@ -54,6 +54,9 @@ HDFS-4949 (Unreleased) HDFS-5190. Move cache pool related CLI commands to CacheAdmin. (Contributed by Andrew Wang) + HDFS-5304. Expose if a block replica is cached in getFileBlockLocations. + (Contributed by Andrew Wang) + OPTIMIZATIONS BUG FIXES Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java?rev=1530802&r1=1530801&r2=1530802&view=diff ============================================================================== --- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java (original) +++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java Wed Oct 9 21:30:08 2013 @@ -17,15 +17,21 @@ */ package org.apache.hadoop.hdfs.protocol; +import java.util.List; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.security.token.Token; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + /** * Associates a block with the Datanodes that contain its replicas * and other block metadata (E.g. the file offset associated with this - * block, whether it is corrupt, security token, etc). + * block, whether it is corrupt, a location is cached in memory, + * security token, etc). */ @InterfaceAudience.Private @InterfaceStability.Evolving @@ -39,9 +45,16 @@ public class LocatedBlock { // their locations are not part of this object private boolean corrupt; private Token<BlockTokenIdentifier> blockToken = new Token<BlockTokenIdentifier>(); + /** + * List of cached datanode locations + */ + private DatanodeInfo[] cachedLocs; + + // Used when there are no locations + private static final DatanodeInfo[] EMPTY_LOCS = new DatanodeInfo[0]; public LocatedBlock(ExtendedBlock b, DatanodeInfo[] locs) { - this(b, locs, -1, false); // startOffset is unknown + this(b, locs, -1); // startOffset is unknown } public LocatedBlock(ExtendedBlock b, DatanodeInfo[] locs, long startOffset) { @@ -50,14 +63,26 @@ public class LocatedBlock { public LocatedBlock(ExtendedBlock b, DatanodeInfo[] locs, long startOffset, boolean corrupt) { + this(b, locs, startOffset, corrupt, EMPTY_LOCS); + } + + public LocatedBlock(ExtendedBlock b, DatanodeInfo[] locs, long startOffset, + boolean corrupt, DatanodeInfo[] cachedLocs) { this.b = b; this.offset = startOffset; this.corrupt = corrupt; if (locs==null) { - this.locs = new DatanodeInfo[0]; + this.locs = EMPTY_LOCS; } else { this.locs = locs; } + Preconditions.checkArgument(cachedLocs != null, + "cachedLocs should not be null, use a different constructor"); + if (cachedLocs.length == 0) { + this.cachedLocs = EMPTY_LOCS; + } else { + this.cachedLocs = cachedLocs; + } } public Token<BlockTokenIdentifier> getBlockToken() { @@ -96,6 +121,36 @@ public class LocatedBlock { return this.corrupt; } + /** + * Add a the location of a cached replica of the block. + * + * @param loc of datanode with the cached replica + */ + public void addCachedLoc(DatanodeInfo loc) { + List<DatanodeInfo> cachedList = Lists.newArrayList(cachedLocs); + if (cachedList.contains(loc)) { + return; + } + // Try to re-use a DatanodeInfo already in loc + for (int i=0; i<locs.length; i++) { + if (locs[i].equals(loc)) { + cachedList.add(locs[i]); + cachedLocs = cachedList.toArray(cachedLocs); + return; + } + } + // Not present in loc, add it and go + cachedList.add(loc); + cachedLocs = cachedList.toArray(cachedLocs); + } + + /** + * @return Datanodes with a cached block replica + */ + public DatanodeInfo[] getCachedLocations() { + return cachedLocs; + } + @Override public String toString() { return getClass().getSimpleName() + "{" + b Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java?rev=1530802&r1=1530801&r2=1530802&view=diff ============================================================================== --- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java (original) +++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java Wed Oct 9 21:30:08 2013 @@ -150,6 +150,7 @@ import org.apache.hadoop.security.proto. import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.protobuf.ByteString; import com.google.protobuf.CodedInputStream; @@ -566,9 +567,21 @@ public class PBHelper { if (b == null) return null; Builder builder = LocatedBlockProto.newBuilder(); DatanodeInfo[] locs = b.getLocations(); + List<DatanodeInfo> cachedLocs = + Lists.newLinkedList(Arrays.asList(b.getCachedLocations())); for (int i = 0; i < locs.length; i++) { - builder.addLocs(i, PBHelper.convert(locs[i])); + DatanodeInfo loc = locs[i]; + builder.addLocs(i, PBHelper.convert(loc)); + boolean locIsCached = cachedLocs.contains(loc); + builder.addIsCached(locIsCached); + if (locIsCached) { + cachedLocs.remove(loc); + } } + Preconditions.checkArgument(cachedLocs.size() == 0, + "Found additional cached replica locations that are not in the set of" + + " storage-backed locations!"); + return builder.setB(PBHelper.convert(b.getBlock())) .setBlockToken(PBHelper.convert(b.getBlockToken())) .setCorrupt(b.isCorrupt()).setOffset(b.getStartOffset()).build(); @@ -581,9 +594,20 @@ public class PBHelper { for (int i = 0; i < locs.size(); i++) { targets[i] = PBHelper.convert(locs.get(i)); } + // Set values from the isCached list, re-using references from loc + List<DatanodeInfo> cachedLocs = new ArrayList<DatanodeInfo>(locs.size()); + List<Boolean> isCachedList = proto.getIsCachedList(); + for (int i=0; i<isCachedList.size(); i++) { + if (isCachedList.get(i)) { + cachedLocs.add(targets[i]); + } + } + LocatedBlock lb = new LocatedBlock(PBHelper.convert(proto.getB()), targets, - proto.getOffset(), proto.getCorrupt()); + proto.getOffset(), proto.getCorrupt(), + cachedLocs.toArray(new DatanodeInfo[0])); lb.setBlockToken(PBHelper.convert(proto.getBlockToken())); + return lb; } Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java?rev=1530802&r1=1530801&r2=1530802&view=diff ============================================================================== --- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java (original) +++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java Wed Oct 9 21:30:08 2013 @@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.protocol.B import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.namenode.FSClusterStats; @@ -188,6 +189,14 @@ public class CacheReplicationManager ext return bc == null ? 0 : bc.getCacheReplication(); } + public void setCachedLocations(LocatedBlock block) { + BlockInfo blockInfo = cachedBlocksMap.getStoredBlock( + block.getBlock().getLocalBlock()); + for (int i=0; i<blockInfo.numNodes(); i++) { + block.addCachedLoc(blockInfo.getDatanode(i)); + } + } + /** * Returns the number of cached replicas of a block */ Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1530802&r1=1530801&r2=1530802&view=diff ============================================================================== --- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Wed Oct 9 21:30:08 2013 @@ -1442,6 +1442,10 @@ public class FSNamesystem implements Nam blockManager.getDatanodeManager().sortLocatedBlocks( clientMachine, lastBlockList); } + // Set caching information for the block list + for (LocatedBlock lb: blocks.getLocatedBlocks()) { + cacheReplicationManager.setCachedLocations(lb); + } } return blocks; } Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto?rev=1530802&r1=1530801&r2=1530802&view=diff ============================================================================== --- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto (original) +++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto Wed Oct 9 21:30:08 2013 @@ -128,6 +128,7 @@ message LocatedBlockProto { // their locations are not part of this object required hadoop.common.TokenProto blockToken = 5; + repeated bool isCached = 6 [packed=true]; // if a location in locs is cached } message DataEncryptionKeyProto { Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java?rev=1530802&r1=1530801&r2=1530802&view=diff ============================================================================== --- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java (original) +++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java Wed Oct 9 21:30:08 2013 @@ -32,6 +32,7 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.HdfsBlockLocation; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; @@ -39,6 +40,7 @@ import org.apache.hadoop.hdfs.Distribute import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.PathBasedCacheDescriptor; import org.apache.hadoop.hdfs.protocol.PathBasedCacheDirective; import org.apache.hadoop.hdfs.server.namenode.NameNode; @@ -160,6 +162,18 @@ public class TestCacheReplicationManager descriptor.getPool()); expected += numBlocksPerFile; waitForExpectedNumCachedBlocks(expected); + HdfsBlockLocation[] locations = + (HdfsBlockLocation[]) dfs.getFileBlockLocations( + new Path(paths.get(i)), 0, numBlocksPerFile * BLOCK_SIZE); + assertEquals("Unexpected number of locations", numBlocksPerFile, + locations.length); + for (HdfsBlockLocation loc: locations) { + assertEquals("Block should be present on all datanodes", + 3, loc.getHosts().length); + DatanodeInfo[] cachedLocs = loc.getLocatedBlock().getCachedLocations(); + assertEquals("Block should be cached on all datanodes", + loc.getHosts().length, cachedLocs.length); + } } // Uncache and check each path in sequence RemoteIterator<PathBasedCacheDescriptor> entries =