Author: cutting Date: Fri Dec 15 14:11:46 2006 New Revision: 487691 URL: http://svn.apache.org/viewvc?view=rev&rev=487691 Log: HADOOP-791. Fix a deadlock in the task tracker. Contributed by Mahadev.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=487691&r1=487690&r2=487691 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Fri Dec 15 14:11:46 2006 @@ -110,6 +110,9 @@ 2. HADOOP-827. Turn off speculative execution by default, since it's currently broken. (omalley via cutting) + 3. HADOOP-791. Fix a deadlock in the task tracker. + (Mahadev Konar via cutting) + Release 0.9.1 - 2006-12-06 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?view=diff&rev=487691&r1=487690&r2=487691 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Fri Dec 15 14:11:46 2006 @@ -1130,28 +1130,33 @@ * We no longer need anything from this task, as the job has * finished. If the task is still running, kill it (and clean up */ - public synchronized void jobHasFinished() throws IOException { - - if (getRunState() == TaskStatus.State.RUNNING) { + public void jobHasFinished() throws IOException { + boolean killTask = false; + synchronized(this){ + killTask = (getRunState() == TaskStatus.State.RUNNING); + if (killTask) { killAndCleanup(false); - } else { - cleanup(); - } - if (keepJobFiles) - return; - - // Delete temp directory in case any task used PhasedFileSystem. - try{ - String systemDir = task.getConf().get("mapred.system.dir"); - Path taskTempDir = new Path(systemDir + "/" + - task.getJobId() + "/" + task.getTipId()); - if( fs.exists(taskTempDir)){ - fs.delete(taskTempDir) ; } - }catch(IOException e){ - LOG.warn("Error in deleting reduce temporary output",e); + } + if (!killTask) { + cleanup(); + } + if (keepJobFiles) + return; + + synchronized(this){ + // Delete temp directory in case any task used PhasedFileSystem. + try{ + String systemDir = task.getConf().get("mapred.system.dir"); + Path taskTempDir = new Path(systemDir + "/" + + task.getJobId() + "/" + task.getTipId() + "/" + task.getTaskId()); + if( fs.exists(taskTempDir)){ + fs.delete(taskTempDir) ; + } + }catch(IOException e){ + LOG.warn("Error in deleting reduce temporary output",e); + } } - // Delete the job directory for this // task if the job is done/failed if (purgeJobFiles) { @@ -1205,6 +1210,9 @@ * We no longer need anything from this task. Either the * controlling job is all done and the files have been copied * away, or the task failed and we don't need the remains. + * Any calls to cleanup should not lock the tip first. + * cleanup does the right thing- updates tasks in Tasktracker + * by locking tasktracker first and then locks the tip. */ void cleanup() throws IOException { String taskId = task.getTaskId();