This is an automated email from the ASF dual-hosted git repository. weichiu pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/hadoop.git
commit 4891f24a94a7a2a293dc379d23e93907bbb40526 Author: Ayush Saxena <ayushsax...@apache.org> AuthorDate: Sun Feb 9 23:19:40 2020 +0530 HDFS-15158. The number of failed volumes mismatch with volumeFailures of Datanode metrics. Contributed by Yang Yun. (cherry picked from commit 6191d4b4a0919863fda78e549ab6c60022e3ebc2) (cherry picked from commit 1e3b0df6abcc1252907c41aaedb3e7e257bce497) --- .../hadoop/hdfs/server/datanode/DataNode.java | 12 +++++----- .../server/datanode/metrics/DataNodeMetrics.java | 6 ++--- .../server/datanode/TestDataNodeVolumeFailure.java | 26 ++++++++++++++++++++++ 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 4cb2d93..62e4262 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -2192,7 +2192,7 @@ public class DataNode extends ReconfigurableBase }); } - private void handleDiskError(String failedVolumes) { + private void handleDiskError(String failedVolumes, int failedNumber) { final boolean hasEnoughResources = data.hasEnoughResource(); LOG.warn("DataNode.handleDiskError on: " + "[{}] Keep Running: {}", failedVolumes, hasEnoughResources); @@ -2201,7 +2201,7 @@ public class DataNode extends ReconfigurableBase // shutdown the DN completely. int dpError = hasEnoughResources ? DatanodeProtocol.DISK_ERROR : DatanodeProtocol.FATAL_DISK_ERROR; - metrics.incrVolumeFailures(); + metrics.incrVolumeFailures(failedNumber); //inform NameNodes for(BPOfferService bpos: blockPoolManager.getAllNamenodeThreads()) { @@ -3408,8 +3408,8 @@ public class DataNode extends ReconfigurableBase } data.handleVolumeFailures(unhealthyVolumes); - Set<StorageLocation> unhealthyLocations = new HashSet<>( - unhealthyVolumes.size()); + int failedNumber = unhealthyVolumes.size(); + Set<StorageLocation> unhealthyLocations = new HashSet<>(failedNumber); StringBuilder sb = new StringBuilder("DataNode failed volumes:"); for (FsVolumeSpi vol : unhealthyVolumes) { @@ -3424,8 +3424,8 @@ public class DataNode extends ReconfigurableBase LOG.warn("Error occurred when removing unhealthy storage dirs", e); } LOG.debug("{}", sb); - // send blockreport regarding volume failure - handleDiskError(sb.toString()); + // send blockreport regarding volume failure + handleDiskError(sb.toString(), failedNumber); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index 58a2f65..00590ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -370,9 +370,9 @@ public class DataNodeMetrics { remoteBytesRead.incr(size); } } - - public void incrVolumeFailures() { - volumeFailures.incr(); + + public void incrVolumeFailures(int size) { + volumeFailures.incr(size); } public void incrDatanodeNetworkErrors() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java index 4b4002b..2508eef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.datanode; +import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; +import static org.apache.hadoop.test.MetricsAsserts.getMetrics; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; @@ -77,6 +79,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; @@ -945,4 +948,27 @@ public class TestDataNodeVolumeFailure { } }, 10, 30 * 1000); } + + /* + * Fail two volumes, and check the metrics of VolumeFailures + */ + @Test + public void testVolumeFailureTwo() throws Exception { + // fail two volumes + data_fail = cluster.getInstanceStorageDir(1, 0); + failedDir = MiniDFSCluster.getFinalizedDir(data_fail, + cluster.getNamesystem().getBlockPoolId()); + failedDir.setReadOnly(); + data_fail = cluster.getInstanceStorageDir(1, 1); + failedDir = MiniDFSCluster.getFinalizedDir(data_fail, + cluster.getNamesystem().getBlockPoolId()); + failedDir.setReadOnly(); + + final DataNode dn = cluster.getDataNodes().get(1); + dn.checkDiskError(); + + MetricsRecordBuilder rb = getMetrics(dn.getMetrics().name()); + long volumeFailures = getLongCounter("VolumeFailures", rb); + assertEquals(2, volumeFailures); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org