Author: acmurthy Date: Thu Oct 11 13:27:47 2007 New Revision: 583945 URL: http://svn.apache.org/viewvc?rev=583945&view=rev Log: HADOOP-2031. Correctly maintain the taskid which takes the TIP to completion, failing which the case of lost tasktrackers isn't handled properly i.e. the map TIP is incorrectly left marked as 'complete' and it is never rescheduled elsewhere, leading to hung reduces. Contributed by Devaraj Das.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=583945&r1=583944&r2=583945&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Thu Oct 11 13:27:47 2007 @@ -271,6 +271,12 @@ information via taskdetails.jsp. This bug was introduced by HADOOP-1874. (Amar Kamat via acmurthy) + HADOOP-2031. Correctly maintain the taskid which takes the TIP to + completion, failing which the case of lost tasktrackers isn't handled + properly i.e. the map TIP is incorrectly left marked as 'complete' and it + is never rescheduled elsewhere, leading to hung reduces. + (Devaraj Das via acmurthy) + IMPROVEMENTS HADOOP-1908. Restructure data node code so that block sending and Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java?rev=583945&r1=583944&r2=583945&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java Thu Oct 11 13:27:47 2007 @@ -89,6 +89,9 @@ // The 'next' usable taskid of this tip int nextTaskId = 0; + // The taskid that took this TIP to SUCCESS + private String successfulTaskId; + // Map from task Id -> TaskTracker Id, contains tasks that are // currently runnings private TreeMap<String, String> activeTasks = new TreeMap<String, String>(); @@ -243,6 +246,18 @@ return !activeTasks.isEmpty(); } + private String getSuccessfulTaskid() { + return successfulTaskId; + } + + private void setSuccessfulTaskid(String successfulTaskId) { + this.successfulTaskId = successfulTaskId; + } + + private void resetSuccessfulTaskid() { + this.successfulTaskId = ""; + } + /** * Is this tip complete? * @@ -253,18 +268,14 @@ } /** - * Is the given taskid in this tip complete? + * Is the given taskid the one that took this tip to completion? * * @param taskid taskid of attempt to check for completion * @return <code>true</code> if taskid is complete, else <code>false</code> */ public boolean isComplete(String taskid) { - TaskStatus status = taskStatuses.get(taskid); - if (status == null) { - return false; - } return ((completes > 0) && - (status.getRunState() == TaskStatus.State.SUCCEEDED)); + getSuccessfulTaskid().equals(taskid)); } /** @@ -473,6 +484,9 @@ if (this.isMapTask() && isComplete(taskid) && jobStatus.getRunState() != JobStatus.SUCCEEDED) { this.completes--; + + // Reset the successfulTaskId since we don't have a SUCCESSFUL task now + resetSuccessfulTaskid(); } @@ -529,6 +543,9 @@ // completedTask(taskid, TaskStatus.State.SUCCEEDED); + // Note the successful taskid + setSuccessfulTaskid(taskid); + // // Now that the TIP is complete, the other speculative // subtasks will be closed when the owning tasktracker