Author: ddas
Date: Fri Feb 20 13:12:14 2009
New Revision: 746227
URL: http://svn.apache.org/viewvc?rev=746227&view=rev
Log:
HADOOP-5233. Addresses the three issues - Race condition in updating status,
NPE in TaskTracker task localization when the conf file is missing
(HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task
(HADOOP-5235). Contributed by Amareshwari Sriramadasu.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=746227&r1=746226&r2=746227&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Fri Feb 20 13:12:14 2009
@@ -808,6 +808,11 @@
Scheduler accesses the tasktrackers stored by the JobTracker.
(Rahul Kumar Singh via yhemanth)
+ HADOOP-5233. Addresses the three issues - Race condition in updating
+ status, NPE in TaskTracker task localization when the conf file is missing
+ (HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task
(HADOOP-5235).
+ (Amareshwari Sriramadasu via ddas)
+
Release 0.19.1 - Unreleased
IMPROVEMENTS
Modified:
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java?rev=746227&r1=746226&r2=746227&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
(original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
Fri Feb 20 13:12:14 2009
@@ -815,6 +815,8 @@
} else {
reduceCleanupTasks.add(taskid);
}
+ // Remove the task entry from jobtracker
+ jobtracker.removeTaskEntry(taskid);
}
//For a failed task update the JT datastructures.
else if (state == TaskStatus.State.FAILED ||
Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java?rev=746227&r1=746226&r2=746227&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java (original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Task.java Fri Feb 20
13:12:14 2009
@@ -514,8 +514,7 @@
if (sendProgress) {
// we need to send progress update
updateCounters();
- taskStatus.statusUpdate(getState(),
- taskProgress.get(),
+ taskStatus.statusUpdate(taskProgress.get(),
taskProgress.toString(),
counters);
taskFound = umbilical.statusUpdate(taskId, taskStatus);
@@ -702,8 +701,7 @@
private void sendLastUpdate(TaskUmbilicalProtocol umbilical)
throws IOException {
// send a final status report
- taskStatus.statusUpdate(getState(),
- taskProgress.get(),
+ taskStatus.statusUpdate(taskProgress.get(),
taskProgress.toString(),
counters);
statusUpdate(umbilical);
Modified:
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java?rev=746227&r1=746226&r2=746227&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java
(original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskInProgress.java
Fri Feb 20 13:12:14 2009
@@ -915,7 +915,6 @@
t.setTaskCleanupTask();
t.setState(taskStatuses.get(taskid).getRunState());
cleanupTasks.put(taskid, taskTracker);
- jobtracker.removeTaskEntry(taskid);
}
t.setConf(conf);
LOG.debug("Launching task with skipRanges:"+failedRanges.getSkipRanges());
Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java?rev=746227&r1=746226&r2=746227&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java
(original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskStatus.java Fri
Feb 20 13:12:14 2009
@@ -54,7 +54,7 @@
private long finishTime;
private long outputSize;
- private Phase phase = Phase.STARTING;
+ private volatile Phase phase = Phase.STARTING;
private Counters counters;
private boolean includeCounters;
private SortedRanges.Range nextRecordRange = new SortedRanges.Range();
@@ -267,16 +267,15 @@
/**
* Update the status of the task.
*
- * @param runstate
+ * This update is done by ping thread before sending the status.
+ *
* @param progress
* @param state
* @param counters
*/
- synchronized void statusUpdate(State runState,
- float progress,
+ synchronized void statusUpdate(float progress,
String state,
Counters counters) {
- setRunState(runState);
setProgress(progress);
setStateString(state);
setCounters(counters);
Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java?rev=746227&r1=746226&r2=746227&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
(original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/TaskTracker.java Fri
Feb 20 13:12:14 2009
@@ -2050,10 +2050,16 @@
if (this.done ||
(this.taskStatus.getRunState() != TaskStatus.State.RUNNING &&
this.taskStatus.getRunState() != TaskStatus.State.COMMIT_PENDING &&
- !isCleaningup())) {
+ !isCleaningup()) ||
+ ((this.taskStatus.getRunState() == TaskStatus.State.COMMIT_PENDING ||
+ this.taskStatus.getRunState() == TaskStatus.State.FAILED_UNCLEAN ||
+ this.taskStatus.getRunState() == TaskStatus.State.KILLED_UNCLEAN) &&
+ taskStatus.getRunState() == TaskStatus.State.RUNNING)) {
//make sure we ignore progress messages after a task has
//invoked TaskUmbilicalProtocol.done() or if the task has been
- //KILLED/FAILED
+ //KILLED/FAILED/FAILED_UNCLEAN/KILLED_UNCLEAN
+ //Also ignore progress update if the state change is from
+ //COMMIT_PENDING/FAILED_UNCLEAN/KILLED_UNCLEA to RUNNING
LOG.info(task.getTaskID() + " Ignoring status-update since " +
((this.done) ? "task is 'done'" :
("runState: " + this.taskStatus.getRunState()))
@@ -2407,7 +2413,10 @@
if (wasFailure) {
failures += 1;
}
- runner.kill();
+ // runner could be null if task-cleanup attempt is not localized yet
+ if (runner != null) {
+ runner.kill();
+ }
setTaskFailState(wasFailure);
} else if (taskStatus.getRunState() == TaskStatus.State.UNASSIGNED) {
if (wasFailure) {
@@ -2486,6 +2495,11 @@
}
synchronized (this) {
try {
+ // localJobConf could be null if localization has not happened
+ // then no cleanup will be required.
+ if (localJobConf == null) {
+ return;
+ }
String taskDir = getLocalTaskDir(task.getJobID().toString(),
taskId.toString(), task.isTaskCleanupTask());
if (needCleanup) {
@@ -2622,7 +2636,8 @@
public synchronized void commitPending(TaskAttemptID taskid,
TaskStatus taskStatus)
throws IOException {
- LOG.info("Task " + taskid + " is in COMMIT_PENDING");
+ LOG.info("Task " + taskid + " is in commit-pending," +"" +
+ " task state:" +taskStatus.getRunState());
statusUpdate(taskid, taskStatus);
reportTaskFinished(taskid, true);
}