Author: yhemanth
Date: Wed Mar 18 13:19:07 2009
New Revision: 755589
URL: http://svn.apache.org/viewvc?rev=755589&view=rev
Log:
HADOOP-5516. Fix NullPointerException in TaskMemoryManagerThread that comes
when monitored processes disappear when the thread is running. Contributed by
Vinod Kumar Vavilapalli.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
hadoop/core/trunk/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=755589&r1=755588&r2=755589&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Mar 18 13:19:07 2009
@@ -1025,6 +1025,10 @@
HADOOP-5514. Fix JobTracker metrics and add metrics for wating, failed
tasks. (cdouglas)
+ HADOOP-5516. Fix NullPointerException in TaskMemoryManagerThread that
comes when
+ monitored processes disappear when the thread is running.
+ (Vinod Kumar Vavilapalli via yhemanth)
+
Release 0.19.2 - Unreleased
BUG FIXES
Modified:
hadoop/core/trunk/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java?rev=755589&r1=755588&r2=755589&view=diff
==============================================================================
---
hadoop/core/trunk/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
(original)
+++
hadoop/core/trunk/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
Wed Mar 18 13:19:07 2009
@@ -97,7 +97,8 @@
}
/**
- * Get the process-tree with latest state.
+ * Get the process-tree with latest state. If the root-process is not alive,
+ * an empty tree will be returned.
*
* @return the process-tree with latest state.
*/
@@ -113,14 +114,19 @@
for (Integer proc : processList) {
// Get information for each process
ProcessInfo pInfo = new ProcessInfo(proc);
- constructProcessInfo(pInfo);
- allProcessInfo.put(proc, pInfo);
- if (proc.equals(this.pid)) {
- me = pInfo; // cache 'me'
- processTree.put(proc, pInfo);
+ if (constructProcessInfo(pInfo) != null) {
+ allProcessInfo.put(proc, pInfo);
+ if (proc.equals(this.pid)) {
+ me = pInfo; // cache 'me'
+ processTree.put(proc, pInfo);
+ }
}
}
+ if (me == null) {
+ return this;
+ }
+
// Add each process to its parent.
for (Map.Entry<Integer, ProcessInfo> entry : allProcessInfo.entrySet()) {
Integer pID = entry.getKey();
Modified:
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java?rev=755589&r1=755588&r2=755589&view=diff
==============================================================================
---
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
(original)
+++
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
Wed Mar 18 13:19:07 2009
@@ -32,6 +32,7 @@
import org.apache.hadoop.mapred.TaskTracker.TaskInProgress;
import org.apache.hadoop.util.ProcfsBasedProcessTree;
import org.apache.hadoop.util.ProcessTree;
+import org.apache.hadoop.util.StringUtils;
/**
* Manages memory usage of tasks running under this TT. Kills any task-trees
@@ -161,77 +162,90 @@
Map.Entry<TaskAttemptID, ProcessTreeInfo> entry = it.next();
TaskAttemptID tid = entry.getKey();
ProcessTreeInfo ptInfo = entry.getValue();
- String pId = ptInfo.getPID();
+ try {
+ String pId = ptInfo.getPID();
- // Initialize any uninitialized processTrees
- if (pId == null) {
- // get pid from pid-file
- pId = getPid(ptInfo.pidFile);
- if (pId != null) {
- // PID will be null, either if the pid file is yet to be created
- // or if the tip is finished and we removed pidFile, but the TIP
- // itself is still retained in runningTasks till successful
- // transmission to JT
-
- long sleeptimeBeforeSigkill = taskTracker.getJobConf().getLong(
-
"mapred.tasktracker.sigkillthread.sleeptime-before-sigkill",
- ProcessTree.DEFAULT_SLEEPTIME_BEFORE_SIGKILL);
-
- // create process tree object
- ProcfsBasedProcessTree pt = new ProcfsBasedProcessTree(pId,
- ProcessTree.isSetsidAvailable, sleeptimeBeforeSigkill);
- LOG.debug("Tracking ProcessTree " + pId + " for the first time");
-
- ptInfo.setPid(pId);
- ptInfo.setProcessTree(pt);
- processTreeInfoMap.put(tid, ptInfo);
+ // Initialize any uninitialized processTrees
+ if (pId == null) {
+ // get pid from pid-file
+ pId = getPid(ptInfo.pidFile);
+ if (pId != null) {
+ // PID will be null, either if the pid file is yet to be created
+ // or if the tip is finished and we removed pidFile, but the TIP
+ // itself is still retained in runningTasks till successful
+ // transmission to JT
+
+ long sleeptimeBeforeSigkill =
+ taskTracker
+ .getJobConf()
+ .getLong(
+
"mapred.tasktracker.sigkillthread.sleeptime-before-sigkill",
+ ProcessTree.DEFAULT_SLEEPTIME_BEFORE_SIGKILL);
+
+ // create process tree object
+ ProcfsBasedProcessTree pt =
+ new ProcfsBasedProcessTree(pId,
+ ProcessTree.isSetsidAvailable, sleeptimeBeforeSigkill);
+ LOG.debug("Tracking ProcessTree " + pId + " for the first time");
+
+ ptInfo.setPid(pId);
+ ptInfo.setProcessTree(pt);
+ }
}
- }
- // End of initializing any uninitialized processTrees
+ // End of initializing any uninitialized processTrees
- if (pId == null) {
- continue; // processTree cannot be tracked
- }
+ if (pId == null) {
+ continue; // processTree cannot be tracked
+ }
- LOG.debug("Constructing ProcessTree for : PID = " + pId + " TID = "
- + tid);
- ProcfsBasedProcessTree pTree = ptInfo.getProcessTree();
- pTree = pTree.getProcessTree(); // get the updated process-tree
- ptInfo.setProcessTree(pTree); // update ptInfo with proces-tree of
- // updated state
- long currentMemUsage = pTree.getCumulativeVmem();
- long limit = ptInfo.getMemLimit();
- LOG.info("Memory usage of ProcessTree " + pId + " :" + currentMemUsage
- + "bytes. Limit : " + limit + "bytes");
-
- if (limit > taskTracker.getLimitMaxVMemPerTask()) {
- // TODO: With monitoring enabled and no scheduling based on
- // memory,users can seriously hijack the system by specifying memory
- // requirements well above the cluster wide limit. Ideally these jobs
- // should have been rejected by JT/scheduler. Because we can't do
- // that, in the minimum we should fail the tasks and hence the job.
- LOG.warn("Task " + tid
- + " 's maxVmemPerTask is greater than TT's limitMaxVmPerTask");
- }
+ LOG.debug("Constructing ProcessTree for : PID = " + pId + " TID = "
+ + tid);
+ ProcfsBasedProcessTree pTree = ptInfo.getProcessTree();
+ pTree = pTree.getProcessTree(); // get the updated process-tree
+ ptInfo.setProcessTree(pTree); // update ptInfo with proces-tree of
+ // updated state
+ long currentMemUsage = pTree.getCumulativeVmem();
+ long limit = ptInfo.getMemLimit();
+ LOG.info("Memory usage of ProcessTree " + pId + " :"
+ + currentMemUsage + "bytes. Limit : " + limit + "bytes");
+
+ if (limit > taskTracker.getLimitMaxVMemPerTask()) {
+ // TODO: With monitoring enabled and no scheduling based on
+ // memory,users can seriously hijack the system by specifying
memory
+ // requirements well above the cluster wide limit. Ideally these
+ // jobs should have been rejected by JT/scheduler. Because we can't
+ // do that, in the minimum we should fail the tasks and hence the
+ // job.
+ LOG.warn("Task " + tid
+ + " 's maxVmemPerTask is greater than TT's limitMaxVmPerTask");
+ }
- if (limit != JobConf.DISABLED_MEMORY_LIMIT
- && currentMemUsage > limit) {
- // Task (the root process) is still alive and overflowing memory.
- // Clean up.
- String msg = "TaskTree [pid=" + pId + ",tipID=" + tid
- + "] is running beyond memory-limits. Current usage : "
- + currentMemUsage + "bytes. Limit : " + limit + "bytes. Killing
task.";
- LOG.warn(msg);
- taskTracker.cleanUpOverMemoryTask(tid, true, msg);
-
- // Now destroy the ProcessTree, remove it from monitoring map.
- pTree.destroy(true/*in the background*/);
- it.remove();
- LOG.info("Removed ProcessTree with root " + pId);
- } else {
- // Accounting the total memory in usage for all tasks that are still
- // alive and within limits.
- memoryStillInUsage += currentMemUsage;
+ if (limit != JobConf.DISABLED_MEMORY_LIMIT
+ && currentMemUsage > limit) {
+ // Task (the root process) is still alive and overflowing memory.
+ // Clean up.
+ String msg =
+ "TaskTree [pid=" + pId + ",tipID=" + tid
+ + "] is running beyond memory-limits. Current usage : "
+ + currentMemUsage + "bytes. Limit : " + limit
+ + "bytes. Killing task.";
+ LOG.warn(msg);
+ taskTracker.cleanUpOverMemoryTask(tid, true, msg);
+
+ // Now destroy the ProcessTree, remove it from monitoring map.
+ pTree.destroy(true/*in the background*/);
+ it.remove();
+ LOG.info("Removed ProcessTree with root " + pId);
+ } else {
+ // Accounting the total memory in usage for all tasks that are
still
+ // alive and within limits.
+ memoryStillInUsage += currentMemUsage;
+ }
+ } catch (Exception e) {
+ // Log the exception and proceed to the next task.
+ LOG.warn("Uncaught exception in TaskMemoryManager "
+ + "while managing memory of " + tid + " : "
+ + StringUtils.stringifyException(e));
}
}
Modified:
hadoop/core/trunk/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java?rev=755589&r1=755588&r2=755589&view=diff
==============================================================================
---
hadoop/core/trunk/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java
(original)
+++
hadoop/core/trunk/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java
Wed Mar 18 13:19:07 2009
@@ -165,7 +165,7 @@
assertEquals(false, p.isAnyProcessInTreeAlive());
}
else {// process should be gone
- assertEquals(false, p.isAlive());
+ assertFalse("ProcessTree must have been gone", p.isAlive());
}
// Not able to join thread sometimes when forking with large N.
try {
@@ -174,5 +174,13 @@
} catch (InterruptedException ie) {
LOG.info("Interrupted while joining RogueTaskThread.");
}
+
+ // ProcessTree is gone now. Any further calls should be sane.
+ p = p.getProcessTree();
+ assertFalse("ProcessTree must have been gone", p.isAlive());
+ assertTrue("Cumulative vmem for the gone-process is "
+ + p.getCumulativeVmem() + " . It should be zero.", p
+ .getCumulativeVmem() == 0);
+ assertTrue(p.toString().equals("[ ]"));
}
}