HDFS-10341. Add a metric to expose the timeout number of pending replication blocks. (Contributed by Akira Ajisaka)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/97e24494 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/97e24494 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/97e24494 Branch: refs/heads/HDFS-1312 Commit: 97e244947719d483c3f80521a00fec8e13dcb637 Parents: 1df6f57 Author: Arpit Agarwal <a...@apache.org> Authored: Thu Jun 2 13:14:45 2016 -0700 Committer: Arpit Agarwal <a...@apache.org> Committed: Thu Jun 2 13:14:45 2016 -0700 ---------------------------------------------------------------------- .../hadoop-common/src/site/markdown/Metrics.md | 1 + .../server/blockmanagement/BlockManager.java | 4 ++++ .../PendingReconstructionBlocks.java | 16 +++++++++++++++- .../hdfs/server/namenode/FSNamesystem.java | 5 +++++ .../TestPendingReconstruction.java | 20 ++++++++++++++------ 5 files changed, 39 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index 699316f..e4e2443 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -218,6 +218,7 @@ Each metrics record contains tags such as HAState and Hostname as additional inf | `TotalSyncCount` | Total number of sync operations performed by edit log | | `TotalSyncTimes` | Total number of milliseconds spent by various edit logs in sync operation| | `NameDirSize` | NameNode name directories size in bytes | +| `NumTimedOutPendingReconstructions` | The number of timed out reconstructions. Not the number of unique blocks that timed out. | JournalNode ----------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index ed57a86..1a76e09 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -198,6 +198,10 @@ public class BlockManager implements BlockStatsMXBean { public int getPendingDataNodeMessageCount() { return pendingDNMessages.count(); } + /** Used by metrics. */ + public long getNumTimedOutPendingReconstructions() { + return pendingReconstruction.getNumTimedOuts(); + } /**replicationRecheckInterval is how often namenode checks for new replication work*/ private final long replicationRecheckInterval; http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java index 528199c..956e94f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java @@ -50,6 +50,7 @@ class PendingReconstructionBlocks { private final ArrayList<BlockInfo> timedOutItems; Daemon timerThread = null; private volatile boolean fsRunning = true; + private long timedOutCount = 0L; // // It might take anywhere between 5 to 10 minutes before @@ -125,6 +126,7 @@ class PendingReconstructionBlocks { synchronized (pendingReconstructions) { pendingReconstructions.clear(); timedOutItems.clear(); + timedOutCount = 0L; } } @@ -149,6 +151,16 @@ class PendingReconstructionBlocks { } /** + * Used for metrics. + * @return The number of timeouts + */ + long getNumTimedOuts() { + synchronized (timedOutItems) { + return timedOutCount + timedOutItems.size(); + } + } + + /** * Returns a list of blocks that have timed out their * reconstruction requests. Returns null if no blocks have * timed out. @@ -158,9 +170,11 @@ class PendingReconstructionBlocks { if (timedOutItems.size() <= 0) { return null; } + int size = timedOutItems.size(); BlockInfo[] blockList = timedOutItems.toArray( - new BlockInfo[timedOutItems.size()]); + new BlockInfo[size]); timedOutItems.clear(); + timedOutCount += size; return blockList; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index a2df6d2..c9f2487 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4507,6 +4507,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, public long getExcessBlocks() { return blockManager.getExcessBlocksCount(); } + + @Metric + public long getNumTimedOutPendingReconstructions() { + return blockManager.getNumTimedOutPendingReconstructions(); + } // HA-only metric @Metric http://git-wip-us.apache.org/repos/asf/hadoop/blob/97e24494/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java index d07c657..c30f630 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingReconstruction.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.util.ArrayList; @@ -117,14 +119,15 @@ public class TestPendingReconstruction { // // verify that nothing has timed out so far // - assertTrue(pendingReconstructions.getTimedOutBlocks() == null); + assertNull(pendingReconstructions.getTimedOutBlocks()); + assertEquals(0L, pendingReconstructions.getNumTimedOuts()); // // Wait for one second and then insert some more items. // try { Thread.sleep(1000); - } catch (Exception e) { + } catch (Exception ignored) { } for (int i = 10; i < 15; i++) { @@ -133,7 +136,8 @@ public class TestPendingReconstruction { DatanodeStorageInfo.toDatanodeDescriptors( DFSTestUtil.createDatanodeStorageInfos(i))); } - assertTrue(pendingReconstructions.size() == 15); + assertEquals(15, pendingReconstructions.size()); + assertEquals(0L, pendingReconstructions.getNumTimedOuts()); // // Wait for everything to timeout. @@ -153,10 +157,14 @@ public class TestPendingReconstruction { // Verify that everything has timed out. // assertEquals("Size of pendingReconstructions ", 0, pendingReconstructions.size()); + assertEquals(15L, pendingReconstructions.getNumTimedOuts()); Block[] timedOut = pendingReconstructions.getTimedOutBlocks(); - assertTrue(timedOut != null && timedOut.length == 15); - for (int i = 0; i < timedOut.length; i++) { - assertTrue(timedOut[i].getBlockId() < 15); + assertNotNull(timedOut); + assertEquals(15, timedOut.length); + // Verify the number is not reset + assertEquals(15L, pendingReconstructions.getNumTimedOuts()); + for (Block block : timedOut) { + assertTrue(block.getBlockId() < 15); } pendingReconstructions.stop(); } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org