Author: jlowe Date: Wed Feb 12 16:51:21 2014 New Revision: 1567676 URL: http://svn.apache.org/r1567676 Log: MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job. Contributed by Jason Lowe
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt?rev=1567676&r1=1567675&r2=1567676&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt Wed Feb 12 16:51:21 2014 @@ -18,6 +18,9 @@ Release 0.23.11 - UNRELEASED MAPREDUCE-5454. TestDFSIO fails intermittently on JDK7 (Karthik Kambatla via jlowe) + MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job + (jlowe) + Release 0.23.10 - 2013-12-09 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java?rev=1567676&r1=1567675&r2=1567676&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java Wed Feb 12 16:51:21 2014 @@ -344,8 +344,10 @@ public class JobHistoryParser implements taskInfo.finishTime = event.getFinishTime(); taskInfo.error = StringInterner.weakIntern(event.getError()); taskInfo.failedDueToAttemptId = event.getFailedAttemptID(); - info.errorInfo = "Task " + taskInfo.taskId +" failed " + - taskInfo.attemptsMap.size() + " times "; + if (info.errorInfo.isEmpty()) { + info.errorInfo = "Task " + taskInfo.taskId + " failed " + + taskInfo.attemptsMap.size() + " times "; + } } private void handleTaskStartedEvent(TaskStartedEvent event) { Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java?rev=1567676&r1=1567675&r2=1567676&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java Wed Feb 12 16:51:21 2014 @@ -41,6 +41,7 @@ import org.apache.hadoop.fs.CommonConfig import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.TypeConverter; @@ -52,7 +53,9 @@ import org.apache.hadoop.mapreduce.jobhi import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; +import org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent; import org.apache.hadoop.mapreduce.jobhistory.TaskFinishedEvent; +import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; @@ -715,4 +718,40 @@ public class TestJobHistoryParsing { assertNull(test.getAMInfos()); } + + @Test + public void testMultipleFailedTasks() throws Exception { + JobHistoryParser parser = + new JobHistoryParser(Mockito.mock(FSDataInputStream.class)); + EventReader reader = Mockito.mock(EventReader.class); + final AtomicInteger numEventsRead = new AtomicInteger(0); // Hack! + final org.apache.hadoop.mapreduce.TaskType taskType = + org.apache.hadoop.mapreduce.TaskType.MAP; + final TaskID[] tids = new TaskID[2]; + JobID jid = new JobID("1", 1); + tids[0] = new TaskID(jid, taskType, 0); + tids[1] = new TaskID(jid, taskType, 1); + Mockito.when(reader.getNextEvent()).thenAnswer( + new Answer<HistoryEvent>() { + public HistoryEvent answer(InvocationOnMock invocation) + throws IOException { + // send two task start and two task fail events for tasks 0 and 1 + int eventId = numEventsRead.getAndIncrement(); + TaskID tid = tids[eventId & 0x1]; + if (eventId < 2) { + return new TaskStartedEvent(tid, 0, taskType, ""); + } + if (eventId < 4) { + TaskFailedEvent tfe = new TaskFailedEvent(tid, 0, taskType, + "failed", "FAILED", null); + tfe.setDatum(tfe.getDatum()); + return tfe; + } + return null; + } + }); + JobInfo info = parser.parse(reader); + assertTrue("Task 0 not implicated", + info.getErrorInfo().contains(tids[0].toString())); + } }