Author: rangadi
Date: Wed Mar 4 00:44:02 2009
New Revision: 749863
URL: http://svn.apache.org/viewvc?rev=749863&view=rev
Log:
HADOOP-4103. NameNode keeps a count of missing blocks. It warns on
WebUI if there are such blocks. '-report' and '-metaSave' have extra
info to track such blocks. (Raghu Angadi)
Added:
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/JspHelper.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMetrics.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/tools/DFSAdmin.java
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/DFSTestUtil.java
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java
hadoop/core/trunk/src/webapps/hdfs/dfshealth.jsp
hadoop/core/trunk/src/webapps/static/hadoop.css
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Mar 4 00:44:02 2009
@@ -330,6 +330,10 @@
HADOOP-4970. The full path name of a file is preserved inside Trash.
(Prasad Chakka via dhruba)
+ HADOOP-4103. NameNode keeps a count of missing blocks. It warns on
+ WebUI if there are such blocks. '-report' and '-metaSave' have extra
+ info to track such blocks. (Raghu Angadi)
+
NEW FEATURES
HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests.
Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java Wed Mar 4
00:44:02 2009
@@ -700,6 +700,31 @@
return new FsStatus(rawNums[0], rawNums[1], rawNums[2]);
}
+ /**
+ * Returns count of blocks with no good replicas left. Normally should be
+ * zero.
+ * @throws IOException
+ */
+ public long getMissingBlocksCount() throws IOException {
+ return namenode.getStats()[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX];
+ }
+
+ /**
+ * Returns count of blocks with one of more replica missing.
+ * @throws IOException
+ */
+ public long getUnderReplicatedBlocksCount() throws IOException {
+ return namenode.getStats()[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX];
+ }
+
+ /**
+ * Returns count of blocks with at least one replica marked corrupt.
+ * @throws IOException
+ */
+ public long getCorruptBlocksCount() throws IOException {
+ return namenode.getStats()[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX];
+ }
+
public DatanodeInfo[] datanodeReport(DatanodeReportType type)
throws IOException {
return namenode.getDatanodeReport(type);
Modified:
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
---
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java
(original)
+++
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java
Wed Mar 4 00:44:02 2009
@@ -24,6 +24,7 @@
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.Block;
@@ -336,6 +337,34 @@
public long getRawUsed() throws IOException{
return dfs.getDiskStatus().getUsed();
}
+
+ /**
+ * Returns count of blocks with no good replicas left. Normally should be
+ * zero.
+ *
+ * @throws IOException
+ */
+ public long getMissingBlocksCount() throws IOException {
+ return dfs.getMissingBlocksCount();
+ }
+
+ /**
+ * Returns count of blocks with one of more replica missing.
+ *
+ * @throws IOException
+ */
+ public long getUnderReplicatedBlocksCount() throws IOException {
+ return dfs.getUnderReplicatedBlocksCount();
+ }
+
+ /**
+ * Returns count of blocks with at least one replica marked corrupt.
+ *
+ * @throws IOException
+ */
+ public long getCorruptBlocksCount() throws IOException {
+ return dfs.getCorruptBlocksCount();
+ }
/** Return statistics for each datanode. */
public DatanodeInfo[] getDataNodeStats() throws IOException {
Modified:
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/protocol/ClientProtocol.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
---
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
(original)
+++
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
Wed Mar 4 00:44:02 2009
@@ -281,6 +281,13 @@
*/
public void renewLease(String clientName) throws IOException;
+ public int GET_STATS_CAPACITY_IDX = 0;
+ public int GET_STATS_USED_IDX = 1;
+ public int GET_STATS_REMAINING_IDX = 2;
+ public int GET_STATS_UNDER_REPLICATED_IDX = 3;
+ public int GET_STATS_CORRUPT_BLOCKS_IDX = 4;
+ public int GET_STATS_MISSING_BLOCKS_IDX = 5;
+
/**
* Get a set of statistics about the filesystem.
* Right now, only three values are returned.
@@ -288,7 +295,12 @@
* <li> [0] contains the total storage capacity of the system, in bytes.</li>
* <li> [1] contains the total used space of the system, in bytes.</li>
* <li> [2] contains the available storage of the system, in bytes.</li>
+ * <li> [3] contains number of under replicated blocks in the system.</li>
+ * <li> [4] contains number of blocks with a corrupt replica. </li>
+ * <li> [5] contains number of blocks without any good replicas left. </li>
* </ul>
+ * Use public constants like {...@link #GET_STATS_CAPACITY_IDX} in place of
+ * actual numbers to index into the array.
*/
public long[] getStats() throws IOException;
Modified:
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
---
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java
(original)
+++
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java
Wed Mar 4 00:44:02 2009
@@ -130,4 +130,8 @@
Collection<DatanodeDescriptor> nodes = getNodes(blk);
return (nodes == null) ? 0 : nodes.size();
}
+
+ public int size() {
+ return corruptReplicasMap.size();
+ }
}
Modified:
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
---
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
(original)
+++
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
Wed Mar 4 00:44:02 2009
@@ -123,7 +123,7 @@
private FSNamesystemMetrics myFSMetrics;
private long capacityTotal = 0L, capacityUsed = 0L, capacityRemaining = 0L;
private int totalLoad = 0;
- private long pendingReplicationBlocksCount = 0L,
+ private long pendingReplicationBlocksCount = 0L, corruptReplicaBlocksCount,
underReplicatedBlocksCount = 0L, scheduledReplicationBlocksCount = 0L;
//
@@ -241,6 +241,8 @@
* Last block index used for replication work.
*/
private int replIndex = 0;
+ private long missingBlocksInCurIter = 0;
+ private long missingBlocksInPrevIter = 0;
private static FSNamesystem fsNamesystemObject;
private SafeModeInfo safeMode; // safe mode information
@@ -485,7 +487,7 @@
/**
* Dump all metadata into specified file
*/
- void metaSave(String filename) throws IOException {
+ synchronized void metaSave(String filename) throws IOException {
checkSuperuserPrivilege();
File file = new File(System.getProperty("hadoop.log.dir"),
filename);
@@ -500,7 +502,21 @@
out.println("Metasave: Blocks waiting for replication: " +
neededReplications.size());
for (Block block : neededReplications) {
- out.print(block);
+ List<DatanodeDescriptor> containingNodes =
+ new ArrayList<DatanodeDescriptor>();
+ NumberReplicas numReplicas = new NumberReplicas();
+ // source node returned is not used
+ chooseSourceDatanode(block, containingNodes, numReplicas);
+ int usableReplicas = numReplicas.liveReplicas() +
+ numReplicas.decommissionedReplicas();
+ // l: == live:, d: == decommissioned c: == corrupt e: == excess
+ out.print(block + " (replicas:" +
+ " l: " + numReplicas.liveReplicas() +
+ " d: " + numReplicas.decommissionedReplicas() +
+ " c: " + numReplicas.corruptReplicas() +
+ " e: " + numReplicas.excessReplicas() +
+ ((usableReplicas > 0)? "" : " MISSING") + ")");
+
for (Iterator<DatanodeDescriptor> jt = blocksMap.nodeIterator(block);
jt.hasNext();) {
DatanodeDescriptor node = jt.next();
@@ -2314,9 +2330,12 @@
workFound = computeReplicationWork(blocksToProcess);
// Update FSNamesystemMetrics counters
- pendingReplicationBlocksCount = pendingReplications.size();
- underReplicatedBlocksCount = neededReplications.size();
- scheduledReplicationBlocksCount = workFound;
+ synchronized (this) {
+ pendingReplicationBlocksCount = pendingReplications.size();
+ underReplicatedBlocksCount = neededReplications.size();
+ scheduledReplicationBlocksCount = workFound;
+ corruptReplicaBlocksCount = corruptReplicas.size();
+ }
if(workFound == 0)
workFound = computeInvalidateWork(nodesToProcess);
@@ -2372,6 +2391,10 @@
int scheduledReplicationCount = 0;
synchronized(neededReplications) {
+ if (neededReplications.size() == 0) {
+ missingBlocksInCurIter = 0;
+ missingBlocksInPrevIter = 0;
+ }
// # of blocks to process equals either twice the number of live
// data-nodes or the number of under-replicated blocks whichever is less
blocksToProcess = Math.min(blocksToProcess, neededReplications.size());
@@ -2390,6 +2413,8 @@
if( ! neededReplicationsIterator.hasNext()) {
// start from the beginning
replIndex = 0;
+ missingBlocksInPrevIter = missingBlocksInCurIter;
+ missingBlocksInCurIter = 0;
blocksToProcess = Math.min(blocksToProcess,
neededReplications.size());
if(blkCnt >= blocksToProcess)
break;
@@ -2416,6 +2441,11 @@
NumberReplicas numReplicas = new NumberReplicas();
DatanodeDescriptor srcNode =
chooseSourceDatanode(block, containingNodes, numReplicas);
+
+ if ((numReplicas.liveReplicas() + numReplicas.decommissionedReplicas())
+ <= 0) {
+ missingBlocksInCurIter++;
+ }
if(srcNode == null) // block can not be replicated from any node
continue;
@@ -3283,10 +3313,18 @@
addStoredBlock(block, node, delHintNode );
}
+ public long getMissingBlocksCount() {
+ // not locking
+ return Math.max(missingBlocksInPrevIter, missingBlocksInCurIter);
+ }
+
long[] getStats() {
synchronized(heartbeats) {
- return new long[]
- {this.capacityTotal, this.capacityUsed, this.capacityRemaining};
+ return new long[] {this.capacityTotal, this.capacityUsed,
+ this.capacityRemaining,
+ this.underReplicatedBlocksCount,
+ this.corruptReplicaBlocksCount,
+ getMissingBlocksCount()};
}
}
@@ -4420,6 +4458,11 @@
return underReplicatedBlocksCount;
}
+ /** Returns number of blocks with corrupt replicas */
+ public long getCorruptReplicaBlocksCount() {
+ return corruptReplicaBlocksCount;
+ }
+
public long getScheduledReplicationBlocks() {
return scheduledReplicationBlocksCount;
}
Modified:
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/JspHelper.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/JspHelper.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
---
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/JspHelper.java
(original)
+++
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/JspHelper.java
Wed Mar 4 00:44:02 2009
@@ -180,6 +180,17 @@
return "Safe mode is ON. <em>" + fsn.getSafeModeTip() + "</em><br>";
}
+ public static String getWarningText(FSNamesystem fsn) {
+ // Ideally this should be displayed in RED
+ long missingBlocks = fsn.getMissingBlocksCount();
+ if (missingBlocks > 0) {
+ return "<br> WARNING :" +
+ " There are about " + missingBlocks +
+ " missing blocks. Please check the log or run fsck. <br><br>";
+ }
+ return "";
+ }
+
public static String getInodeLimitText(FSNamesystem fsn) {
long inodes = fsn.dir.totalInodes();
long blocks = fsn.getBlocksTotal();
Modified:
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
---
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java
(original)
+++
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java
Wed Mar 4 00:44:02 2009
@@ -79,7 +79,7 @@
if (decommissionedReplicas > 0) {
return 0;
}
- return LEVEL; // no need to replicate
+ return 2; // keep these blocks in needed replication.
} else if(curReplicas==1) {
return 0; // highest priority
} else if(curReplicas*3<expectedReplicas) {
Modified:
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMetrics.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMetrics.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
---
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMetrics.java
(original)
+++
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMetrics.java
Wed Mar 4 00:44:02 2009
@@ -55,6 +55,7 @@
private final MetricsIntValue pendingReplicationBlocks = new
MetricsIntValue("PendingReplicationBlocks", registry);
private final MetricsIntValue underReplicatedBlocks = new
MetricsIntValue("UnderReplicatedBlocks", registry);
private final MetricsIntValue scheduledReplicationBlocks = new
MetricsIntValue("ScheduledReplicationBlocks", registry);
+ private final MetricsIntValue missingBlocks = new
MetricsIntValue("MissingBlocks", registry);
private final FSNamesystem fsNameSystem;
@@ -107,6 +108,7 @@
underReplicatedBlocks.set((int)fsNameSystem.getUnderReplicatedBlocks());
scheduledReplicationBlocks.set((int)fsNameSystem.
getScheduledReplicationBlocks());
+ missingBlocks.set((int)fsNameSystem.getMissingBlocksCount());
for (MetricsBase m : registry.getMetricsList()) {
m.pushMetric(metricsRecord);
Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/tools/DFSAdmin.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/tools/DFSAdmin.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/tools/DFSAdmin.java
(original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/tools/DFSAdmin.java Wed
Mar 4 00:44:02 2009
@@ -286,6 +286,19 @@
System.out.println("DFS Used%: "
+ StringUtils.limitDecimalTo2(((1.0 * used) /
presentCapacity) * 100)
+ "%");
+
+ /* These counts are not always upto date. They are updated after
+ * iteration of an internal list. Should be updated in a few seconds to
+ * minutes. Use "-metaSave" to list of all such blocks and accurate
+ * counts.
+ */
+ System.out.println("Under replicated blocks: " +
+ dfs.getUnderReplicatedBlocksCount());
+ System.out.println("Blocks with corrupt replicas: " +
+ dfs.getCorruptBlocksCount());
+ System.out.println("Missing blocks: " +
+ dfs.getMissingBlocksCount());
+
System.out.println();
System.out.println("-------------------------------------------------");
Modified: hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/DFSTestUtil.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/DFSTestUtil.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/DFSTestUtil.java
(original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/DFSTestUtil.java Wed Mar
4 00:44:02 2009
@@ -19,9 +19,12 @@
package org.apache.hadoop.hdfs;
import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
+import java.net.URL;
+import java.net.URLConnection;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
@@ -259,6 +262,14 @@
return b.toString();
}
+ // Returns url content as string.
+ public static String urlGet(URL url) throws IOException {
+ URLConnection conn = url.openConnection();
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ IOUtils.copyBytes(conn.getInputStream(), out, 4096, true);
+ return out.toString();
+ }
+
static public Configuration
getConfigurationWithDifferentUsername(Configuration conf
) throws IOException {
final Configuration c = new Configuration(conf);
Modified:
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
---
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java
(original)
+++
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java
Wed Mar 4 00:44:02 2009
@@ -18,11 +18,9 @@
package org.apache.hadoop.hdfs;
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.URL;
-import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.*;
@@ -50,19 +48,6 @@
private static final Log LOG =
LogFactory.getLog(TestDatanodeBlockScanner.class);
- private static String urlGet(URL url) {
- try {
- URLConnection conn = url.openConnection();
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- IOUtils.copyBytes(conn.getInputStream(), out, 4096, true);
- return out.toString();
- } catch (IOException e) {
- LOG.warn("Failed to fetch " + url.toString() + " : " +
- e.getMessage());
- }
- return "";
- }
-
private static Pattern pattern =
Pattern.compile(".*?(blk_[-]*\\d+).*?scan time\\s*:\\s*(\\d+)");
/**
@@ -79,7 +64,7 @@
String block = DFSTestUtil.getFirstBlock(fs, file).getBlockName();
while (verificationTime <= 0) {
- String response = urlGet(url);
+ String response = DFSTestUtil.urlGet(url);
for(Matcher matcher = pattern.matcher(response); matcher.find();) {
if (block.equals(matcher.group(1))) {
verificationTime = Long.parseLong(matcher.group(2));
Added:
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java?rev=749863&view=auto
==============================================================================
---
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java
(added)
+++
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java
Wed Mar 4 00:44:02 2009
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URL;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ChecksumException;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import junit.framework.TestCase;
+
+/**
+ * The test makes sure that NameNode detects presense blocks that do not have
+ * any valid replicas. In addition, it verifies that HDFS front page displays
+ * a warning in such a case.
+ */
+public class TestMissingBlocksAlert extends TestCase {
+
+ private static final Log LOG =
+ LogFactory.getLog(TestMissingBlocksAlert.class);
+
+ public void testMissingBlocksAlert() throws IOException,
+ InterruptedException {
+
+ MiniDFSCluster cluster = null;
+
+ try {
+ Configuration conf = new Configuration();
+ //minimize test delay
+ conf.setInt("dfs.replication.interval", 0);
+ int fileLen = 10*1024;
+
+ //start a cluster with single datanode
+ cluster = new MiniDFSCluster(conf, 1, true, null);
+ cluster.waitActive();
+
+ DistributedFileSystem dfs =
+ (DistributedFileSystem) cluster.getFileSystem();
+
+ // create a normal file
+ DFSTestUtil.createFile(dfs, new Path("/testMissingBlocksAlert/file1"),
+ fileLen, (short)3, 0);
+
+ Path corruptFile = new Path("/testMissingBlocks/corruptFile");
+ DFSTestUtil.createFile(dfs, corruptFile, fileLen, (short)3, 0);
+
+
+ // Corrupt the block
+ String block = DFSTestUtil.getFirstBlock(dfs,
corruptFile).getBlockName();
+ TestDatanodeBlockScanner.corruptReplica(block, 0);
+
+ // read the file so that the corrupt block is reported to NN
+ FSDataInputStream in = dfs.open(corruptFile);
+ try {
+ in.readFully(new byte[fileLen]);
+ } catch (ChecksumException ignored) { // checksum error is expected.
+ }
+ in.close();
+
+ LOG.info("Waiting for missing blocks count to increase...");
+
+ while (dfs.getMissingBlocksCount() <= 0) {
+ Thread.sleep(100);
+ }
+ assertTrue(dfs.getMissingBlocksCount() == 1);
+
+
+ // Now verify that it shows up on webui
+ URL url = new URL("http://" + conf.get("dfs.http.address") +
+ "/dfshealth.jsp");
+ String dfsFrontPage = DFSTestUtil.urlGet(url);
+ String warnStr = "WARNING : There are about ";
+ assertTrue("HDFS Front page does not contain expected warning",
+ dfsFrontPage.contains(warnStr + "1 missing blocks"));
+
+ // now do the reverse : remove the file expect the number of missing
+ // blocks to go to zero
+
+ dfs.delete(corruptFile, true);
+
+ LOG.info("Waiting for missing blocks count to be zero...");
+ while (dfs.getMissingBlocksCount() > 0) {
+ Thread.sleep(100);
+ }
+
+ // and make sure WARNING disappears
+ // Now verify that it shows up on webui
+ dfsFrontPage = DFSTestUtil.urlGet(url);
+ assertFalse("HDFS Front page contains unexpected warning",
+ dfsFrontPage.contains(warnStr));
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+}
Modified: hadoop/core/trunk/src/webapps/hdfs/dfshealth.jsp
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/webapps/hdfs/dfshealth.jsp?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
--- hadoop/core/trunk/src/webapps/hdfs/dfshealth.jsp (original)
+++ hadoop/core/trunk/src/webapps/hdfs/dfshealth.jsp Wed Mar 4 00:44:02 2009
@@ -264,6 +264,8 @@
<h3>Cluster Summary</h3>
<b> <%= JspHelper.getSafeModeText(fsn)%> </b>
<b> <%= JspHelper.getInodeLimitText(fsn)%> </b>
+<a class="warning"> <%= JspHelper.getWarningText(fsn)%></a>
+
<%
generateDFSHealthReport(out, nn, request);
%>
Modified: hadoop/core/trunk/src/webapps/static/hadoop.css
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/webapps/static/hadoop.css?rev=749863&r1=749862&r2=749863&view=diff
==============================================================================
--- hadoop/core/trunk/src/webapps/static/hadoop.css (original)
+++ hadoop/core/trunk/src/webapps/static/hadoop.css Wed Mar 4 00:44:02 2009
@@ -76,6 +76,11 @@
background-color : lightyellow;
}
+.warning {
+ font-weight : bolder;
+ color : red;
+}
+
div#dfstable table {
white-space : pre;
}