Author: mc
Date: Sun Jun 26 11:03:17 2005
New Revision: 201885

URL: http://svn.apache.org/viewcvs?rev=201885&view=rev
Log:

  Abort Job if a single task fails too many times.
This prevents runaway processes.


Modified:
    
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java

Modified: 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java?rev=201885&r1=201884&r2=201885&view=diff
==============================================================================
--- 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java 
(original)
+++ 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java 
Sun Jun 26 11:03:17 2005
@@ -33,6 +33,7 @@
  *******************************************************/
 public class JobTracker implements MRConstants, InterTrackerProtocol, 
JobSubmissionProtocol {
     static final int TRACKERINFO_PORT = 7845;
+    static final int MAX_TASK_FAILURES = 3;
 
     public static final Logger LOG = 
LogFormatter.getLogger("org.apache.nutch.mapred.JobTracker");
     public static JobTracker tracker = null;
@@ -475,6 +476,7 @@
         TreeMap completeMapTasks = new TreeMap();
         TreeMap incompleteReduceTasks = new TreeMap();
         TreeMap completeReduceTasks = new TreeMap();
+        TreeMap taskFailures = new TreeMap();
 
         // Info for user; useless for JobTracker
         int numMapTasks = 0;
@@ -665,6 +667,15 @@
                 completeReduceTasks.remove(taskid);
                 incompleteReduceTasks.put(taskid, t);
             }
+            
+            // Check if we need to kill the job because of excess failures
+            Integer failures = (Integer) taskFailures.get(taskid);
+            int numFailures = ((failures == null) ? 0 : failures.intValue()) + 
1;
+            taskFailures.put(taskid, new Integer(numFailures));
+            if (numFailures >= MAX_TASK_FAILURES) {
+                kill();
+            }
+
             if (status.getRunState() == JobStatus.RUNNING) {
                 executeTask(taskid);
             }


Reply via email to