Author: wang Date: Tue Dec 17 00:48:04 2013 New Revision: 1551427 URL: http://svn.apache.org/r1551427 Log: HDFS-5350. Name Node should report fsimage transfer time as a metric. Contributed by Jimmy Xiang.
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1551427&r1=1551426&r2=1551427&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Tue Dec 17 00:48:04 2013 @@ -156,6 +156,9 @@ Release 2.4.0 - UNRELEASED HDFS-5652. Refactor invalid block token exception handling in DFSInputStream. (Liang Xie via junping_du) + HDFS-5350. Name Node should report fsimage transfer time as a metric. + (Jimmy Xiang via wang) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java?rev=1551427&r1=1551426&r2=1551427&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java Tue Dec 17 00:48:04 2013 @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import static org.apache.hadoop.util.Time.now; + import java.security.PrivilegedExceptionAction; import java.util.*; import java.io.*; @@ -42,6 +44,7 @@ import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.server.common.JspHelper; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.StorageInfo; +import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.hdfs.util.MD5FileUtils; @@ -89,6 +92,7 @@ public class GetImageServlet extends Htt final GetImageParams parsedParams = new GetImageParams(request, response); final Configuration conf = (Configuration)getServletContext().getAttribute(JspHelper.CURRENT_CONF); + final NameNodeMetrics metrics = NameNode.getNameNodeMetrics(); if (UserGroupInformation.isSecurityEnabled() && !isValidRequestor(context, request.getUserPrincipal().getName(), conf)) { @@ -129,14 +133,26 @@ public class GetImageServlet extends Htt throw new IOException(errorMessage); } CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders(); + long start = now(); serveFile(imageFile); + + if (metrics != null) { // Metrics non-null only when used inside name node + long elapsed = now() - start; + metrics.addGetImage(elapsed); + } } else if (parsedParams.isGetEdit()) { long startTxId = parsedParams.getStartTxId(); long endTxId = parsedParams.getEndTxId(); File editFile = nnImage.getStorage() .findFinalizedEditsFile(startTxId, endTxId); + long start = now(); serveFile(editFile); + + if (metrics != null) { // Metrics non-null only when used inside name node + long elapsed = now() - start; + metrics.addGetEdit(elapsed); + } } else if (parsedParams.isPutImage()) { final long txid = parsedParams.getTxId(); @@ -160,12 +176,18 @@ public class GetImageServlet extends Htt UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab(); } + long start = now(); // issue a HTTP get request to download the new fsimage MD5Hash downloadImageDigest = TransferFsImage.downloadImageToStorage( parsedParams.getInfoServer(), txid, nnImage.getStorage(), true); nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest); + + if (metrics != null) { // Metrics non-null only when used inside name node + long elapsed = now() - start; + metrics.addPutImage(elapsed); + } // Now that we have a new checkpoint, we might be able to // remove some old ones. Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java?rev=1551427&r1=1551426&r2=1551427&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java Tue Dec 17 00:48:04 2013 @@ -85,6 +85,13 @@ public class NameNodeMetrics { @Metric("Time loading FS Image at startup in msec") MutableGaugeInt fsImageLoadTime; + @Metric("GetImageServlet getEdit") + MutableRate getEdit; + @Metric("GetImageServlet getImage") + MutableRate getImage; + @Metric("GetImageServlet putImage") + MutableRate putImage; + NameNodeMetrics(String processName, String sessionId, int[] intervals) { registry.tag(ProcessName, processName).tag(SessionId, sessionId); @@ -232,4 +239,16 @@ public class NameNodeMetrics { public void setSafeModeTime(long elapsed) { safeModeTime.set((int) elapsed); } + + public void addGetEdit(long latency) { + getEdit.add(latency); + } + + public void addGetImage(long latency) { + getImage.add(latency); + } + + public void addPutImage(long latency) { + putImage.add(latency); + } } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java?rev=1551427&r1=1551426&r2=1551427&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java Tue Dec 17 00:48:04 2013 @@ -20,6 +20,9 @@ package org.apache.hadoop.hdfs.server.na import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI; import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints; import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs; +import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt; +import static org.apache.hadoop.test.MetricsAsserts.assertGaugeGt; +import static org.apache.hadoop.test.MetricsAsserts.getMetrics; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -74,6 +77,7 @@ import org.apache.hadoop.hdfs.server.pro import org.apache.hadoop.hdfs.tools.DFSAdmin; import org.apache.hadoop.io.Text; import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.apache.hadoop.test.GenericTestUtils.LogCapturer; @@ -107,6 +111,7 @@ public class TestCheckpoint { } static final Log LOG = LogFactory.getLog(TestCheckpoint.class); + static final String NN_METRICS = "NameNodeActivity"; static final long seed = 0xDEADBEEFL; static final int blockSize = 4096; @@ -1055,6 +1060,14 @@ public class TestCheckpoint { // secondary = startSecondaryNameNode(conf); secondary.doCheckpoint(); + + MetricsRecordBuilder rb = getMetrics(NN_METRICS); + assertCounterGt("GetImageNumOps", 0, rb); + assertCounterGt("GetEditNumOps", 0, rb); + assertCounterGt("PutImageNumOps", 0, rb); + assertGaugeGt("GetImageAvgTime", 0.0, rb); + assertGaugeGt("GetEditAvgTime", 0.0, rb); + assertGaugeGt("PutImageAvgTime", 0.0, rb); } finally { fileSys.close(); cleanup(secondary);