Author: jing9 Date: Fri Aug 30 07:44:35 2013 New Revision: 1518903 URL: http://svn.apache.org/r1518903 Log: HDFS-5140. Merge change r1518902 from branch-2.
Modified: hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Modified: hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1518903&r1=1518902&r2=1518903&view=diff ============================================================================== --- hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Aug 30 07:44:35 2013 @@ -120,6 +120,9 @@ Release 2.1.1-beta - UNRELEASED HDFS-5077. NPE in FSNamesystem.commitBlockSynchronization(). (Plamen Jeliazkov via shv) + HDFS-5140. Too many safemode monitor threads being created in the standby + namenode causing it to fail with out of memory error. (jing9) + Release 2.1.0-beta - 2013-08-22 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1518903&r1=1518902&r2=1518903&view=diff ============================================================================== --- hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Aug 30 07:44:35 2013 @@ -349,7 +349,7 @@ public class FSNamesystem implements Nam final LeaseManager leaseManager = new LeaseManager(this); - Daemon smmthread = null; // SafeModeMonitor thread + volatile Daemon smmthread = null; // SafeModeMonitor thread Daemon nnrmthread = null; // NamenodeResourceMonitor thread @@ -4508,7 +4508,9 @@ public class FSNamesystem implements Nam // Have to have write-lock since leaving safemode initializes // repl queues, which requires write lock assert hasWriteLock(); - if (needEnter()) { + // if smmthread is already running, the block threshold must have been + // reached before, there is no need to enter the safe mode again + if (smmthread == null && needEnter()) { enter(); // check if we are ready to initialize replication queues if (canInitializeReplQueues() && !isPopulatingReplQueues()) { @@ -4517,7 +4519,7 @@ public class FSNamesystem implements Nam reportStatus("STATE* Safe mode ON.", false); return; } - // the threshold is reached + // the threshold is reached or was reached before if (!isOn() || // safe mode is off extension <= 0 || threshold <= 0) { // don't need to wait this.leave(); // leave safe mode @@ -4529,9 +4531,11 @@ public class FSNamesystem implements Nam } // start monitor reached = now(); - smmthread = new Daemon(new SafeModeMonitor()); - smmthread.start(); - reportStatus("STATE* Safe mode extension entered.", true); + if (smmthread == null) { + smmthread = new Daemon(new SafeModeMonitor()); + smmthread.start(); + reportStatus("STATE* Safe mode extension entered.", true); + } // check if we are ready to initialize replication queues if (canInitializeReplQueues() && !isPopulatingReplQueues()) { @@ -4767,6 +4771,7 @@ public class FSNamesystem implements Nam if (safeMode.canLeave()) { // Leave safe mode. safeMode.leave(); + smmthread = null; break; } } finally { @@ -4782,7 +4787,6 @@ public class FSNamesystem implements Nam if (!fsRunning) { LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread"); } - smmthread = null; } }