Author: bobby Date: Fri Mar 8 16:03:33 2013 New Revision: 1454440 URL: http://svn.apache.org/r1454440 Log: svn merge -c 1452372 FIXES: MAPREDUCE-5043 Fetch failure processing can cause AM event queue to backup and eventually OOM Contributed by Jason Lowe
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/TaskAttempt.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockJobs.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt?rev=1454440&r1=1454439&r2=1454440&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt Fri Mar 8 16:03:33 2013 @@ -88,6 +88,9 @@ Release 0.23.7 - UNRELEASED appropriately used and that on-disk segments are correctly sorted on file-size. (Anty Rao and Ravi Prakash via acmurthy) + MAPREDUCE-5043. Fetch failure processing can cause AM event queue to + backup and eventually OOM (Jason Lowe via bobby) + MAPREDUCE-5023. History Server Web Services missing Job Counters (Ravi Prakash via tgraves) Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/TaskAttempt.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/TaskAttempt.java?rev=1454440&r1=1454439&r2=1454440&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/TaskAttempt.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/TaskAttempt.java Fri Mar 8 16:03:33 2013 @@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.a import java.util.List; import org.apache.hadoop.mapreduce.Counters; +import org.apache.hadoop.mapreduce.v2.api.records.Phase; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; @@ -36,6 +37,7 @@ public interface TaskAttempt { List<String> getDiagnostics(); Counters getCounters(); float getProgress(); + Phase getPhase(); TaskAttemptState getState(); /** Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java?rev=1454440&r1=1454439&r2=1454440&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java Fri Mar 8 16:03:33 2013 @@ -1543,6 +1543,20 @@ public class JobImpl implements org.apac SingleArcTransition<JobImpl, JobEvent> { @Override public void transition(JobImpl job, JobEvent event) { + //get number of shuffling reduces + int shufflingReduceTasks = 0; + for (TaskId taskId : job.reduceTasks) { + Task task = job.tasks.get(taskId); + if (TaskState.RUNNING.equals(task.getState())) { + for(TaskAttempt attempt : task.getAttempts().values()) { + if(attempt.getPhase() == Phase.SHUFFLE) { + shufflingReduceTasks++; + break; + } + } + } + } + JobTaskAttemptFetchFailureEvent fetchfailureEvent = (JobTaskAttemptFetchFailureEvent) event; for (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId mapId : @@ -1551,20 +1565,6 @@ public class JobImpl implements org.apac fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures+1); job.fetchFailuresMapping.put(mapId, fetchFailures); - //get number of shuffling reduces - int shufflingReduceTasks = 0; - for (TaskId taskId : job.reduceTasks) { - Task task = job.tasks.get(taskId); - if (TaskState.RUNNING.equals(task.getState())) { - for(TaskAttempt attempt : task.getAttempts().values()) { - if(attempt.getReport().getPhase() == Phase.SHUFFLE) { - shufflingReduceTasks++; - break; - } - } - } - } - float failureRate = shufflingReduceTasks == 0 ? 1.0f : (float) fetchFailures / shufflingReduceTasks; // declare faulty if fetch-failures >= max-allowed-failures Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java?rev=1454440&r1=1454439&r2=1454440&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java Fri Mar 8 16:03:33 2013 @@ -948,6 +948,16 @@ public abstract class TaskAttemptImpl im } @Override + public Phase getPhase() { + readLock.lock(); + try { + return reportedStatus.phase; + } finally { + readLock.unlock(); + } + } + + @Override public TaskAttemptState getState() { readLock.lock(); try { Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockJobs.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockJobs.java?rev=1454440&r1=1454439&r2=1454440&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockJobs.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockJobs.java Fri Mar 8 16:03:33 2013 @@ -271,6 +271,11 @@ public class MockJobs extends MockApps { } @Override + public Phase getPhase() { + return report.getPhase(); + } + + @Override public TaskAttemptState getState() { return report.getTaskAttemptState(); } Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java?rev=1454440&r1=1454439&r2=1454440&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java Fri Mar 8 16:03:33 2013 @@ -39,6 +39,7 @@ import org.apache.hadoop.mapreduce.v2.ap import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobReport; import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.api.records.Phase; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport; @@ -633,6 +634,11 @@ public class TestRuntimeEstimators { } @Override + public Phase getPhase() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override public TaskAttemptState getState() { if (overridingState != null) { return overridingState; Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java?rev=1454440&r1=1454439&r2=1454440&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java Fri Mar 8 16:03:33 2013 @@ -24,6 +24,7 @@ import java.util.List; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; +import org.apache.hadoop.mapreduce.v2.api.records.Phase; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; @@ -101,6 +102,11 @@ public class CompletedTaskAttempt implem } @Override + public Phase getPhase() { + return Phase.CLEANUP; + } + + @Override public TaskAttemptState getState() { return state; }