Author: cdouglas
Date: Tue May 10 20:18:44 2011
New Revision: 1101629
URL: http://svn.apache.org/viewvc?rev=1101629&view=rev
Log:
MAPREDUCE-2456. Log the reduce taskID and associated TaskTrackers with
failed fetch notifications in the JobTracker log.
Contributed by Jeffrey Naisbitt
Modified:
hadoop/common/branches/branch-0.20-security/CHANGES.txt
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java
Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1101629&r1=1101628&r2=1101629&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Tue May 10 20:18:44
2011
@@ -9,6 +9,10 @@ Release 0.20.205.0 - unreleased
MAPREDUCE-2451. Log the details from health check script at the
JobTracker. (Thomas Graves via cdouglas)
+ MAPREDUCE-2456. Log the reduce taskID and associated TaskTrackers with
+ failed fetch notifications in the JobTracker log.
+ (Jeffrey Naisbitt via cdouglas)
+
Release 0.20.204.0 - unreleased
BUG FIXES
Modified:
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java?rev=1101629&r1=1101628&r2=1101629&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
(original)
+++
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
Tue May 10 20:18:44 2011
@@ -3249,13 +3249,17 @@ public class JobInProgress {
synchronized void fetchFailureNotification(TaskInProgress tip,
TaskAttemptID mapTaskId,
- String trackerName) {
+ String mapTrackerName,
+ TaskAttemptID reduceTaskId,
+ String reduceTrackerName) {
Integer fetchFailures = mapTaskIdToFetchFailuresMap.get(mapTaskId);
fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures+1);
mapTaskIdToFetchFailuresMap.put(mapTaskId, fetchFailures);
- LOG.info("Failed fetch notification #" + fetchFailures + " for task " +
- mapTaskId);
-
+ LOG.info("Failed fetch notification #" + fetchFailures + " for map task: "
+ + mapTaskId + " running on tracker: " + mapTrackerName
+ + " and reduce task: " + reduceTaskId + " running on tracker: "
+ + reduceTrackerName);
+
float failureRate = (float)fetchFailures / runningReduceTasks;
// declare faulty if fetch-failures >= max-allowed-failures
boolean isMapFaulty = failureRate >= MAX_ALLOWED_FETCH_FAILURES_PERCENT;
@@ -3267,7 +3271,7 @@ public class JobInProgress {
failedTask(tip, mapTaskId, "Too many fetch-failures",
(tip.isMapTask() ? TaskStatus.Phase.MAP :
TaskStatus.Phase.REDUCE),
- TaskStatus.State.FAILED, trackerName);
+ TaskStatus.State.FAILED, mapTrackerName);
mapTaskIdToFetchFailuresMap.remove(mapTaskId);
}
Modified:
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java?rev=1101629&r1=1101628&r2=1101629&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java
(original)
+++
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java
Tue May 10 20:18:44 2011
@@ -4750,9 +4750,11 @@ public class JobTracker implements MRCon
if (failedFetchTrackerName == null) {
failedFetchTrackerName = "Lost task tracker";
}
- failedFetchMap.getJob().fetchFailureNotification(failedFetchMap,
- mapTaskId,
-
failedFetchTrackerName);
+ failedFetchMap.getJob().fetchFailureNotification(failedFetchMap,
+ mapTaskId,
+
failedFetchTrackerName,
+ taskId,
+ trackerName);
}
}
}