HDFS-8410. Add computation time metrics to datanode for ECWorker. Contributed by SammiChen.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/61e30cf8 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/61e30cf8 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/61e30cf8 Branch: refs/heads/YARN-4752 Commit: 61e30cf83ca78529603d9b4c6732418da7e4d0c8 Parents: ae8bccd Author: Andrew Wang <w...@apache.org> Authored: Fri Oct 21 13:12:35 2016 -0700 Committer: Andrew Wang <w...@apache.org> Committed: Fri Oct 21 13:12:41 2016 -0700 ---------------------------------------------------------------------- .../erasurecode/StripedBlockReconstructor.java | 3 ++ .../datanode/metrics/DataNodeMetrics.java | 13 +++++- .../TestDataNodeErasureCodingMetrics.java | 43 +++++++++++++------- 3 files changed, 43 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/61e30cf8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java index 9f9f15d..a8e9d30 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java @@ -103,7 +103,10 @@ class StripedBlockReconstructor extends StripedReconstructor int[] erasedIndices = stripedWriter.getRealTargetIndices(); ByteBuffer[] outputs = stripedWriter.getRealTargetBuffers(toReconstructLen); + long start = System.nanoTime(); getDecoder().decode(inputs, erasedIndices, outputs); + long end = System.nanoTime(); + this.getDatanode().getMetrics().incrECDecodingTime(end - start); stripedWriter.updateRealTargetBuffers(toReconstructLen); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/61e30cf8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index dc12787..23e15a2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.datanode.metrics; import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId; +import static org.apache.hadoop.metrics2.lib.Interns.info; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -134,6 +135,8 @@ public class DataNodeMetrics { MutableCounterLong ecReconstructionTasks; @Metric("Count of erasure coding failed reconstruction tasks") MutableCounterLong ecFailedReconstructionTasks; + // Nanoseconds spent by decoding tasks. + MutableCounterLong ecDecodingTimeNanos; final MetricsRegistry registry = new MetricsRegistry("datanode"); final String name; @@ -153,7 +156,10 @@ public class DataNodeMetrics { sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len]; ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len]; ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len]; - + ecDecodingTimeNanos = registry.newCounter( + info("ecDecodingTimeNanos", "Nanoseconds spent by decoding tasks"), + (long) 0); + for (int i = 0; i < len; i++) { int interval = intervals[i]; packetAckRoundTripTimeNanosQuantiles[i] = registry.newQuantiles( @@ -442,7 +448,10 @@ public class DataNodeMetrics { } public void setDataNodeActiveXceiversCount(int value) { - this.dataNodeActiveXceiversCount.set(value); + dataNodeActiveXceiversCount.set(value); } + public void incrECDecodingTime(long decodingTimeNanos) { + ecDecodingTimeNanos.incr(decodingTimeNanos); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/61e30cf8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java index 919fb72..825aa5a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode; +import com.google.common.base.Supplier; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -42,14 +43,15 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import org.apache.hadoop.test.GenericTestUtils; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import java.io.IOException; import java.util.Arrays; - /** * This file tests the erasure coding metrics in DataNode. */ @@ -94,24 +96,37 @@ public class TestDataNodeErasureCodingMetrics { DataNode workerDn = doTest("/testEcTasks"); MetricsRecordBuilder rb = getMetrics(workerDn.getMetrics().name()); - // EcReconstructionTasks metric value will be updated in the finally block - // of striped reconstruction thread. Here, giving a grace period to finish - // EC reconstruction metric updates in DN. - LOG.info("Waiting to finish EC reconstruction metric updates in DN"); - int retries = 0; - while (retries < 20) { - long taskMetricValue = getLongCounter("EcReconstructionTasks", rb); - if (taskMetricValue > 0) { - break; + // Ensure that reconstruction task is finished + GenericTestUtils.waitFor(new Supplier<Boolean>() { + @Override + public Boolean get() { + long taskMetricValue = getLongCounter("EcReconstructionTasks", rb); + return (taskMetricValue > 0); } - Thread.sleep(500); - retries++; - rb = getMetrics(workerDn.getMetrics().name()); - } + }, 500, 10000); + assertCounter("EcReconstructionTasks", (long) 1, rb); assertCounter("EcFailedReconstructionTasks", (long) 0, rb); } + @Test(timeout = 120000) + public void testEcCodingTime() throws Exception { + DataNode workerDn = doTest("/testEcCodingTime"); + MetricsRecordBuilder rb = getMetrics(workerDn.getMetrics().name()); + + // Ensure that reconstruction task is finished + GenericTestUtils.waitFor(new Supplier<Boolean>() { + @Override + public Boolean get() { + long taskMetricValue = getLongCounter("EcReconstructionTasks", rb); + return (taskMetricValue > 0); + } + }, 500, 10000); + + long decodeTime = getLongCounter("ecDecodingTimeNanos", rb); + Assert.assertTrue(decodeTime > 0); + } + private DataNode doTest(String fileName) throws Exception { Path file = new Path(fileName); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org