Author: atm
Date: Fri Jun 8 06:57:52 2012
New Revision: 1347895
URL: http://svn.apache.org/viewvc?rev=1347895&view=rev
Log:
HDFS-2914. HA: Standby should not enter safemode when resources are low.
Contributed by Vinay.
Modified:
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1347895&r1=1347894&r2=1347895&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Jun 8
06:57:52 2012
@@ -300,6 +300,9 @@ Branch-2 ( Unreleased changes )
HDFS-3485. DataTransferThrottler will over-throttle when currentTimeMillis
jumps (Andy Isaacson via todd)
+ HDFS-2914. HA: Standby should not enter safemode when resources are low.
+ (Vinay via atm)
+
BREAKDOWN OF HDFS-3042 SUBTASKS
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
Modified:
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1347895&r1=1347894&r2=1347895&view=diff
==============================================================================
---
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
(original)
+++
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
Fri Jun 8 06:57:52 2012
@@ -572,8 +572,6 @@ public class FSNamesystem implements Nam
!safeMode.isPopulatingReplQueues();
setBlockTotal();
blockManager.activate(conf);
- this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
- nnrmthread.start();
} finally {
writeUnlock();
}
@@ -590,7 +588,6 @@ public class FSNamesystem implements Nam
writeLock();
try {
if (blockManager != null) blockManager.close();
- if (nnrmthread != null) nnrmthread.interrupt();
} finally {
writeUnlock();
}
@@ -644,6 +641,10 @@ public class FSNamesystem implements Nam
}
leaseManager.startMonitor();
startSecretManagerIfNecessary();
+
+ //ResourceMonitor required only at ActiveNN. See HDFS-2914
+ this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
+ nnrmthread.start();
} finally {
writeUnlock();
}
@@ -666,6 +667,10 @@ public class FSNamesystem implements Nam
if (leaseManager != null) {
leaseManager.stopMonitor();
}
+ if (nnrmthread != null) {
+ ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
+ nnrmthread.interrupt();
+ }
if (dir != null && dir.fsImage != null) {
if (dir.fsImage.editLog != null) {
dir.fsImage.editLog.close();
@@ -3193,10 +3198,11 @@ public class FSNamesystem implements Nam
* acceptable levels, this daemon will cause the NN to exit safe mode.
*/
class NameNodeResourceMonitor implements Runnable {
+ boolean shouldNNRmRun = true;
@Override
public void run () {
try {
- while (fsRunning) {
+ while (fsRunning && shouldNNRmRun) {
checkAvailableResources();
if(!nameNodeHasResourcesAvailable()) {
String lowResourcesMsg = "NameNode low on available disk space. ";
@@ -3217,7 +3223,11 @@ public class FSNamesystem implements Nam
FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
}
}
- }
+
+ public void stopMonitor() {
+ shouldNNRmRun = false;
+ }
+ }
public FSImage getFSImage() {
return dir.fsImage;
Modified:
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java?rev=1347895&r1=1347894&r2=1347895&view=diff
==============================================================================
---
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
(original)
+++
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
Fri Jun 8 06:57:52 2012
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hdfs.server.namenode.ha;
+import static
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
+import static
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
import static org.junit.Assert.*;
import java.io.File;
@@ -127,6 +129,7 @@ public class TestFailureOfSharedDir {
@Test
public void testFailureOfSharedDir() throws Exception {
Configuration conf = new Configuration();
+ conf.setLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 2000);
// The shared edits dir will automatically be marked required.
MiniDFSCluster cluster = null;
@@ -151,6 +154,15 @@ public class TestFailureOfSharedDir {
assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
true));
+ Thread.sleep(conf.getLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
+ DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT) * 2);
+
+ NameNode nn1 = cluster.getNameNode(1);
+ assertTrue(nn1.isStandbyState());
+ assertFalse(
+ "StandBy NameNode should not go to SafeMode on resource
unavailability",
+ nn1.isInSafeMode());
+
NameNode nn0 = cluster.getNameNode(0);
nn0.getNamesystem().getFSImage().getEditLog().getJournalSet()
.setRuntimeForTesting(mockRuntime);