Author: atm
Date: Fri Jun 8 07:04:58 2012
New Revision: 1347898
URL: http://svn.apache.org/viewvc?rev=1347898&view=rev
Log:
HDFS-2914. HA: Standby should not enter safemode when resources are low.
Contributed by Vinay.
Modified:
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
Modified:
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1347898&r1=1347897&r2=1347898&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
(original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
Fri Jun 8 07:04:58 2012
@@ -165,6 +165,8 @@ Release 2.0.1-alpha - UNRELEASED
HDFS-3485. DataTransferThrottler will over-throttle when currentTimeMillis
jumps (Andy Isaacson via todd)
+ HDFS-2914. HA: Standby should not enter safemode when resources are low.
(Vinay via atm)
+
BREAKDOWN OF HDFS-3042 SUBTASKS
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
Modified:
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1347898&r1=1347897&r2=1347898&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
Fri Jun 8 07:04:58 2012
@@ -557,8 +557,6 @@ public class FSNamesystem implements Nam
!safeMode.isPopulatingReplQueues();
setBlockTotal();
blockManager.activate(conf);
- this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
- nnrmthread.start();
} finally {
writeUnlock();
}
@@ -575,7 +573,6 @@ public class FSNamesystem implements Nam
writeLock();
try {
if (blockManager != null) blockManager.close();
- if (nnrmthread != null) nnrmthread.interrupt();
} finally {
writeUnlock();
}
@@ -629,6 +626,10 @@ public class FSNamesystem implements Nam
}
leaseManager.startMonitor();
startSecretManagerIfNecessary();
+
+ //ResourceMonitor required only at ActiveNN. See HDFS-2914
+ this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
+ nnrmthread.start();
} finally {
writeUnlock();
}
@@ -651,6 +652,10 @@ public class FSNamesystem implements Nam
if (leaseManager != null) {
leaseManager.stopMonitor();
}
+ if (nnrmthread != null) {
+ ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
+ nnrmthread.interrupt();
+ }
if (dir != null && dir.fsImage != null) {
if (dir.fsImage.editLog != null) {
dir.fsImage.editLog.close();
@@ -3178,10 +3183,11 @@ public class FSNamesystem implements Nam
* acceptable levels, this daemon will cause the NN to exit safe mode.
*/
class NameNodeResourceMonitor implements Runnable {
+ boolean shouldNNRmRun = true;
@Override
public void run () {
try {
- while (fsRunning) {
+ while (fsRunning && shouldNNRmRun) {
checkAvailableResources();
if(!nameNodeHasResourcesAvailable()) {
String lowResourcesMsg = "NameNode low on available disk space. ";
@@ -3202,7 +3208,11 @@ public class FSNamesystem implements Nam
FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
}
}
- }
+
+ public void stopMonitor() {
+ shouldNNRmRun = false;
+ }
+ }
public FSImage getFSImage() {
return dir.fsImage;
Modified:
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java?rev=1347898&r1=1347897&r2=1347898&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
Fri Jun 8 07:04:58 2012
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hdfs.server.namenode.ha;
+import static
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
+import static
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
import static org.junit.Assert.*;
import java.io.File;
@@ -127,6 +129,7 @@ public class TestFailureOfSharedDir {
@Test
public void testFailureOfSharedDir() throws Exception {
Configuration conf = new Configuration();
+ conf.setLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 2000);
// The shared edits dir will automatically be marked required.
MiniDFSCluster cluster = null;
@@ -151,6 +154,15 @@ public class TestFailureOfSharedDir {
assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
true));
+ Thread.sleep(conf.getLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
+ DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT) * 2);
+
+ NameNode nn1 = cluster.getNameNode(1);
+ assertTrue(nn1.isStandbyState());
+ assertFalse(
+ "StandBy NameNode should not go to SafeMode on resource
unavailability",
+ nn1.isInSafeMode());
+
NameNode nn0 = cluster.getNameNode(0);
nn0.getNamesystem().getFSImage().getEditLog().getJournalSet()
.setRuntimeForTesting(mockRuntime);