Repository: hadoop
Updated Branches:
refs/heads/branch-2.7 ccd24f091 -> 5a714fee4
HDFS-11634. Optimize BlockIterator when interating starts in the middle.
Contributed by Konstantin V Shvachko.
(cherry picked from commit 8dfcd95d580bb090af7f40af0a57061518c18c8c)
Conflicts:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5a714fee
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5a714fee
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5a714fee
Branch: refs/heads/branch-2.7
Commit: 5a714fee4376821c9ce1a5e00c34b3acc25240e7
Parents: ccd24f0
Author: Konstantin V Shvachko <[email protected]>
Authored: Mon Apr 17 15:04:06 2017 -0700
Committer: Konstantin V Shvachko <[email protected]>
Committed: Mon Apr 17 17:39:29 2017 -0700
----------------------------------------------------------------------
.../server/blockmanagement/BlockManager.java | 7 +-
.../blockmanagement/DatanodeDescriptor.java | 33 +++++++--
.../org/apache/hadoop/hdfs/TestGetBlocks.java | 78 ++++++++++++++++++--
.../blockmanagement/BlockManagerTestUtil.java | 16 ++++
4 files changed, 115 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5a714fee/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index cc6c881..85f9201 100644
---
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1060,12 +1060,9 @@ public class BlockManager {
if(numBlocks == 0) {
return new BlocksWithLocations(new BlockWithLocations[0]);
}
- Iterator<BlockInfoContiguous> iter = node.getBlockIterator();
+ // starting from a random block
int startBlock = DFSUtil.getRandom().nextInt(numBlocks); // starting from
a random block
- // skip blocks
- for(int i=0; i<startBlock; i++) {
- iter.next();
- }
+ Iterator<BlockInfoContiguous> iter = node.getBlockIterator(startBlock);
List<BlockWithLocations> results = new ArrayList<BlockWithLocations>();
long totalSize = 0;
BlockInfoContiguous curBlock;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5a714fee/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
----------------------------------------------------------------------
diff --git
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
index 2ec5678..02b8eb9 100644
---
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@@ -493,18 +493,35 @@ public class DatanodeDescriptor extends DatanodeInfo {
private int index = 0;
private final List<Iterator<BlockInfoContiguous>> iterators;
- private BlockIterator(final DatanodeStorageInfo... storages) {
+ private BlockIterator(final int startBlock,
+ final DatanodeStorageInfo... storages) {
+ if(startBlock < 0) {
+ throw new IllegalArgumentException(
+ "Illegal value startBlock = " + startBlock);
+ }
List<Iterator<BlockInfoContiguous>> iterators = new
ArrayList<Iterator<BlockInfoContiguous>>();
+ int s = startBlock;
+ int sumBlocks = 0;
for (DatanodeStorageInfo e : storages) {
- iterators.add(e.getBlockIterator());
+ int numBlocks = e.numBlocks();
+ sumBlocks += numBlocks;
+ if(sumBlocks <= startBlock) {
+ s -= numBlocks;
+ } else {
+ iterators.add(e.getBlockIterator());
+ }
}
this.iterators = Collections.unmodifiableList(iterators);
+ // skip to the storage containing startBlock
+ for(; s > 0 && hasNext(); s--) {
+ next();
+ }
}
@Override
public boolean hasNext() {
update();
- return !iterators.isEmpty() && iterators.get(index).hasNext();
+ return index < iterators.size() && iterators.get(index).hasNext();
}
@Override
@@ -526,10 +543,14 @@ public class DatanodeDescriptor extends DatanodeInfo {
}
Iterator<BlockInfoContiguous> getBlockIterator() {
- return new BlockIterator(getStorageInfos());
+ return getBlockIterator(0);
}
- Iterator<BlockInfoContiguous> getBlockIterator(final String storageID) {
- return new BlockIterator(getStorageInfo(storageID));
+
+ /**
+ * Get iterator, which starts iterating from the specified block.
+ */
+ Iterator<BlockInfoContiguous> getBlockIterator(final int startBlock) {
+ return new BlockIterator(startBlock, getStorageInfos());
}
void incrementPendingReplicationWithoutTargets() {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5a714fee/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
----------------------------------------------------------------------
diff --git
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
index f36d0ec..fae1def 100644
---
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
+++
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.*;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
@@ -37,15 +38,18 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import org.apache.hadoop.hdfs.server.common.GenerationStamp;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import
org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.util.Time;
import org.junit.Test;
/**
@@ -184,8 +188,10 @@ public class TestGetBlocks {
final Random r = new Random();
CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DEFAULT_BLOCK_SIZE);
- MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(
- REPLICATION_FACTOR).build();
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF)
+ .numDataNodes(REPLICATION_FACTOR)
+ .storagesPerDatanode(4)
+ .build();
try {
cluster.waitActive();
@@ -194,7 +200,7 @@ public class TestGetBlocks {
FSDataOutputStream out = fs.create(new Path("/tmp.txt"),
REPLICATION_FACTOR);
byte[] data = new byte[1024];
- long fileLen = 2 * DEFAULT_BLOCK_SIZE;
+ long fileLen = 12 * DEFAULT_BLOCK_SIZE;
long bytesToWrite = fileLen;
while (bytesToWrite > 0) {
r.nextBytes(data);
@@ -209,12 +215,12 @@ public class TestGetBlocks {
List<LocatedBlock> locatedBlocks;
DatanodeInfo[] dataNodes = null;
boolean notWritten;
+ final DFSClient dfsclient = new DFSClient(NameNode.getAddress(CONF),
+ CONF);
do {
- final DFSClient dfsclient = new DFSClient(NameNode.getAddress(CONF),
- CONF);
locatedBlocks = dfsclient.getNamenode()
.getBlockLocations("/tmp.txt", 0, fileLen).getLocatedBlocks();
- assertEquals(2, locatedBlocks.size());
+ assertEquals(12, locatedBlocks.size());
notWritten = false;
for (int i = 0; i < 2; i++) {
dataNodes = locatedBlocks.get(i).getLocations();
@@ -228,6 +234,7 @@ public class TestGetBlocks {
}
}
} while (notWritten);
+ dfsclient.close();
// get RPC client to namenode
InetSocketAddress addr = new InetSocketAddress("localhost",
@@ -238,7 +245,7 @@ public class TestGetBlocks {
// get blocks of size fileLen from dataNodes[0]
BlockWithLocations[] locs;
locs = namenode.getBlocks(dataNodes[0], fileLen).getBlocks();
- assertEquals(locs.length, 2);
+ assertEquals(locs.length, 12);
assertEquals(locs[0].getStorageIDs().length, 2);
assertEquals(locs[1].getStorageIDs().length, 2);
@@ -261,6 +268,8 @@ public class TestGetBlocks {
// get blocks of size BlockSize from a non-existent datanode
DatanodeInfo info = DFSTestUtil.getDatanodeInfo("1.2.3.4");
getBlocksWithException(namenode, info, 2);
+
+ testBlockIterator(cluster);
} finally {
cluster.shutdown();
}
@@ -278,6 +287,59 @@ public class TestGetBlocks {
assertTrue(getException);
}
+ /**
+ * BlockIterator iterates over all blocks belonging to DatanodeDescriptor
+ * through multiple storages.
+ * The test verifies that BlockIterator can be set to start iterating from
+ * a particular starting block index.
+ */
+ void testBlockIterator(MiniDFSCluster cluster) {
+ FSNamesystem ns = cluster.getNamesystem();
+ String dId = cluster.getDataNodes().get(0).getDatanodeUuid();
+ DatanodeDescriptor dnd = BlockManagerTestUtil.getDatanode(ns, dId);
+ DatanodeStorageInfo[] storages = dnd.getStorageInfos();
+ assertEquals("DataNode should have 4 storages", 4, storages.length);
+
+ Iterator<BlockInfoContiguous> dnBlockIt = null;
+ // check illegal start block number
+ try {
+ dnBlockIt = BlockManagerTestUtil.getBlockIterator(
+ cluster.getNamesystem(), dId, -1);
+ assertTrue("Should throw IllegalArgumentException", false);
+ } catch(IllegalArgumentException ei) {
+ // as expected
+ }
+ assertNull("Iterator should be null", dnBlockIt);
+
+ // form an array of all DataNode blocks
+ int numBlocks = dnd.numBlocks();
+ BlockInfoContiguous[] allBlocks = new BlockInfoContiguous[numBlocks];
+ int idx = 0;
+ for(DatanodeStorageInfo s : storages) {
+ Iterator<BlockInfoContiguous> storageBlockIt =
+ BlockManagerTestUtil.getBlockIterator(s);
+ while(storageBlockIt.hasNext()) {
+ allBlocks[idx++] = storageBlockIt.next();
+ }
+ }
+
+ // check iterator for every block as a starting point
+ for(int i = 0; i < allBlocks.length; i++) {
+ // create iterator starting from i
+ dnBlockIt = BlockManagerTestUtil.getBlockIterator(ns, dId, i);
+ assertTrue("Block iterator should have next block", dnBlockIt.hasNext());
+ // check iterator lists blocks in the desired order
+ for(int j = i; j < allBlocks.length; j++) {
+ assertEquals("Wrong block order", allBlocks[j], dnBlockIt.next());
+ }
+ }
+
+ // check start block number larger than numBlocks in the DataNode
+ dnBlockIt = BlockManagerTestUtil.getBlockIterator(
+ ns, dId, allBlocks.length + 1);
+ assertFalse("Iterator should not have next block", dnBlockIt.hasNext());
+ }
+
@Test
public void testBlockKey() {
Map<Block, Long> map = new HashMap<Block, Long>();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5a714fee/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
----------------------------------------------------------------------
diff --git
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
index 2f48b91..a67d245 100644
---
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
+++
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.ExecutionException;
@@ -53,6 +54,21 @@ public class BlockManagerTestUtil {
}
}
+ public static Iterator<BlockInfoContiguous> getBlockIterator(final
FSNamesystem ns,
+ final String storageID, final int startBlock) {
+ ns.readLock();
+ try {
+ DatanodeDescriptor dn =
+ ns.getBlockManager().getDatanodeManager().getDatanode(storageID);
+ return dn.getBlockIterator(startBlock);
+ } finally {
+ ns.readUnlock();
+ }
+ }
+
+ public static Iterator<BlockInfoContiguous>
getBlockIterator(DatanodeStorageInfo s) {
+ return s.getBlockIterator();
+ }
/**
* Refresh block queue counts on the name-node.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]