Author: jeagles Date: Tue May 13 18:28:07 2014 New Revision: 1594318 URL: http://svn.apache.org/r1594318 Log: MAPREDUCE-5888. Failed job leaves hung AM after it unregisters (Jason Lowe via jeagles)
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1594318&r1=1594317&r2=1594318&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Tue May 13 18:28:07 2014 @@ -80,6 +80,9 @@ Release 2.5.0 - UNRELEASED MAPREDUCE-5884. History server uses short user name when canceling tokens (Mohammad Kamrul Islam via jlowe) + MAPREDUCE-5888. Failed job leaves hung AM after it unregisters (Jason Lowe + via jeagles) + Release 2.4.1 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java?rev=1594318&r1=1594317&r2=1594318&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java Tue May 13 18:28:07 2014 @@ -32,6 +32,7 @@ import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; @@ -129,6 +130,8 @@ import org.apache.hadoop.yarn.state.Stat import org.apache.hadoop.yarn.state.StateMachineFactory; import org.apache.hadoop.yarn.util.Clock; +import com.google.common.util.concurrent.ThreadFactoryBuilder; + /** Implementation of Job interface. Maintains the state machines of Job. * The read and write calls use ReadWriteLock for concurrency. */ @@ -644,8 +647,8 @@ public class JobImpl implements org.apac private JobStateInternal forcedState = null; - //Executor used for running future tasks. Setting thread pool size to 1 - private ScheduledThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(1); + //Executor used for running future tasks. + private ScheduledThreadPoolExecutor executor; private ScheduledFuture failWaitTriggerScheduledFuture; private JobState lastNonFinalState = JobState.NEW; @@ -687,6 +690,13 @@ public class JobImpl implements org.apac this.aclsManager = new JobACLsManager(conf); this.username = System.getProperty("user.name"); this.jobACLs = aclsManager.constructJobACLs(conf); + + ThreadFactory threadFactory = new ThreadFactoryBuilder() + .setNameFormat("Job Fail Wait Timeout Monitor #%d") + .setDaemon(true) + .build(); + this.executor = new ScheduledThreadPoolExecutor(1, threadFactory); + // This "this leak" is okay because the retained pointer is in an // instance variable. stateMachine = stateMachineFactory.make(this);