Author: omalley
Date: Tue Jul 10 16:30:41 2007
New Revision: 555114

URL: http://svn.apache.org/viewvc?view=rev&rev=555114
Log:
HADOOP-1554.  Log killed tasks in the JobHistory. Contributed by Devaraj.


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java
    lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp
    lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Jul 10 16:30:41 2007
@@ -283,6 +283,8 @@
  87. HADOOP-1571.  Add contrib lib directories to root build.xml
      javadoc classpath.  (Michael Stack via tomwhite)
 
+ 88. HADOOP-1554.  Log killed tasks to the job history and display them on the
+     web/ui. (Devaraj Das via omalley)
 
 Release 0.13.0 - 2007-06-08
 

Modified: 
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- 
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java
 (original)
+++ 
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java
 Tue Jul 10 16:30:41 2007
@@ -169,7 +169,7 @@
   
   
   // call this only for jobs that succeeded for better results. 
-  static class BadNodesFilter implements JobHistory.Listener {
+  static class FailedOnNodesFilter implements JobHistory.Listener {
     private Map<String, Set<String>> badNodesToNumFailedTasks =
       new HashMap<String, Set<String>>();
     
@@ -183,6 +183,34 @@
           recType.equals(JobHistory.RecordTypes.ReduceAttempt)) {
         
         if (Values.FAILED.name().equals(values.get(Keys.TASK_STATUS)) ){
+          String hostName = values.get(Keys.HOSTNAME);
+          String taskid = values.get(Keys.TASKID); 
+          Set<String> tasks = badNodesToNumFailedTasks.get(hostName); 
+          if (null == tasks ){
+            tasks = new TreeSet<String>(); 
+            tasks.add(taskid);
+            badNodesToNumFailedTasks.put(hostName, tasks);
+          }else{
+            tasks.add(taskid);
+          }
+        }
+      }      
+    }
+  }
+  static class KilledOnNodesFilter implements JobHistory.Listener {
+    private Map<String, Set<String>> badNodesToNumFailedTasks =
+      new HashMap<String, Set<String>>();
+    
+    Map<String, Set<String>> getValues(){
+      return badNodesToNumFailedTasks; 
+    }
+    public void handle(JobHistory.RecordTypes recType, Map<Keys, String> 
values)
+      throws IOException {
+      
+      if (recType.equals(JobHistory.RecordTypes.MapAttempt) || 
+          recType.equals(JobHistory.RecordTypes.ReduceAttempt)) {
+        
+        if (Values.KILLED.name().equals(values.get(Keys.TASK_STATUS)) ){
           String hostName = values.get(Keys.HOSTNAME);
           String taskid = values.get(Keys.TASKID); 
           Set<String> tasks = badNodesToNumFailedTasks.get(hostName); 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java 
(original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java Tue 
Jul 10 16:30:41 2007
@@ -564,6 +564,28 @@
                                        String.valueOf(timestamp), hostName, 
error}); 
         }
       }
+    }
+    /**
+     * Log task attempt failed event.  
+     * @param jobId jobid
+     * @param taskId taskid
+     * @param taskAttemptId task attempt id
+     * @param timestamp timestamp
+     * @param hostName hostname of this task attempt.
+     * @param error error message if any for this task attempt. 
+     */
+    public static void logKilled(String jobId, String taskId, String 
taskAttemptId, 
+                                 long timestamp, String hostName, String 
error){
+      if (!disableHistory){
+        PrintWriter writer = (PrintWriter)openJobs.get(JOBTRACKER_START_TIME + 
"_" + jobId);
+        if (null != writer){
+          JobHistory.log(writer, RecordTypes.MapAttempt, 
+                         new Enum[]{Keys.TASK_TYPE, Keys.TASKID, 
Keys.TASK_ATTEMPT_ID, Keys.TASK_STATUS, 
+                                    Keys.FINISH_TIME, Keys.HOSTNAME, 
Keys.ERROR},
+                         new String[]{ Values.MAP.name(), taskId, 
taskAttemptId, Values.KILLED.name(),
+                                       String.valueOf(timestamp), hostName, 
error}); 
+        }
+      }
     } 
   }
   /**
@@ -638,6 +660,29 @@
         }
       }
     }
+    /**
+     * Log failed reduce task attempt. 
+     * @param jobId job id 
+     * @param taskId task id
+     * @param taskAttemptId task attempt id
+     * @param timestamp time stamp when task failed
+     * @param hostName host name of the task attempt.  
+     * @param error error message of the task. 
+     */
+    public static void logKilled(String jobId, String taskId, String 
taskAttemptId, long timestamp, 
+                                 String hostName, String error){
+      if (!disableHistory){
+        PrintWriter writer = (PrintWriter)openJobs.get(JOBTRACKER_START_TIME + 
"_" + jobId);
+        if (null != writer){
+          JobHistory.log(writer, RecordTypes.ReduceAttempt, 
+                         new Enum[]{  Keys.TASK_TYPE, Keys.TASKID, 
Keys.TASK_ATTEMPT_ID, Keys.TASK_STATUS, 
+                                      Keys.FINISH_TIME, Keys.HOSTNAME, 
Keys.ERROR },
+                         new String[]{ Values.REDUCE.name(), taskId, 
taskAttemptId, Values.KILLED.name(), 
+                                       String.valueOf(timestamp), hostName, 
error }); 
+        }
+      }
+    }
+
   }
   /**
    * Callback interface for reading back log events from JobHistory. This 
interface 

Modified: 
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java 
(original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java 
Tue Jul 10 16:30:41 2007
@@ -929,18 +929,30 @@
     String taskTrackerName = status.getTaskTracker();
     if (status.getIsMap()) {
       JobHistory.MapAttempt.logStarted(profile.getJobId(), 
-                                       tip.getTIPId(), status.getTaskId(), 
status.getStartTime(), 
-                                       taskTrackerName); 
-      JobHistory.MapAttempt.logFailed(profile.getJobId(), 
-                                      tip.getTIPId(), status.getTaskId(), 
System.currentTimeMillis(),
-                                      taskTrackerName, 
status.getDiagnosticInfo()); 
+                tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
+                taskTrackerName);
+      if (status.getRunState() == TaskStatus.State.FAILED) {
+        JobHistory.MapAttempt.logFailed(profile.getJobId(), 
+                tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
+                taskTrackerName, status.getDiagnosticInfo());
+      } else {
+        JobHistory.MapAttempt.logKilled(profile.getJobId(), 
+                tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
+                taskTrackerName, status.getDiagnosticInfo());
+      }
     } else {
       JobHistory.ReduceAttempt.logStarted(profile.getJobId(), 
-                                          tip.getTIPId(), status.getTaskId(), 
status.getStartTime(), 
-                                          taskTrackerName); 
-      JobHistory.ReduceAttempt.logFailed(profile.getJobId(), 
-                                         tip.getTIPId(), status.getTaskId(), 
System.currentTimeMillis(),
-                                         taskTrackerName, 
status.getDiagnosticInfo()); 
+                tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
+                taskTrackerName);
+      if (status.getRunState() == TaskStatus.State.FAILED) {
+        JobHistory.ReduceAttempt.logFailed(profile.getJobId(), 
+                tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
+                taskTrackerName, status.getDiagnosticInfo());
+      } else {
+        JobHistory.ReduceAttempt.logKilled(profile.getJobId(), 
+                tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
+                taskTrackerName, status.getDiagnosticInfo());
+      }
     }
         
     // After this, try to assign tasks with the one after this, so that

Modified: lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp (original)
+++ lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp Tue Jul 10 
16:30:41 2007
@@ -29,26 +29,29 @@
 <b>Launched At : </b> <%=StringUtils.getFormattedTimeWithDiff(dateFormat, 
job.getLong(Keys.LAUNCH_TIME), job.getLong(Keys.SUBMIT_TIME)) %><br/>
 <b>Finished At : </b>  <%=StringUtils.getFormattedTimeWithDiff(dateFormat, 
job.getLong(Keys.FINISH_TIME), job.getLong(Keys.LAUNCH_TIME)) %><br/>
 <b>Status : </b> <%= ((job.get(Keys.JOB_STATUS) == null)?"Incomplete" 
:job.get(Keys.JOB_STATUS)) %><br/> 
-<b><a href="analysejobhistory.jsp?jobid=<%=jobid 
%>&jobTrackerId=<%=jobTrackerId %>">Analyse This Job</a></b> 
-<hr/>
-<center>
 <%
        Map<String, JobHistory.Task> tasks = job.getAllTasks();
        int totalMaps = 0 ; 
        int totalReduces = 0; 
        int failedMaps = 0; 
+       int killedMaps = 0;
        int failedReduces = 0 ; 
+       int killedReduces = 0;
        
        long mapStarted = 0 ; 
        long mapFinished = 0 ; 
        long reduceStarted = 0 ; 
        long reduceFinished = 0; 
+        
+        Map <String,String> allHosts = new TreeMap<String,String>();
        
        for( JobHistory.Task task : tasks.values() ) {
          
          long startTime = task.getLong(Keys.START_TIME) ; 
          long finishTime = task.getLong(Keys.FINISH_TIME) ; 
          
+          allHosts.put(task.get(Keys.HOSTNAME), null);
+
          if( Values.MAP.name().equals(task.get(Keys.TASK_TYPE)) ){
            if( mapStarted==0 || mapStarted > startTime ){
              mapStarted = startTime; 
@@ -63,6 +66,9 @@
                if( Values.FAILED.name().equals(attempt.get(Keys.TASK_STATUS)) 
) {
                    failedMaps++; 
                }
+               if( Values.KILLED.name().equals(attempt.get(Keys.TASK_STATUS)) 
) {
+                   killedMaps++; 
+               }
            }
          }else{
            if( reduceStarted==0||reduceStarted > startTime ){
@@ -77,13 +83,20 @@
                if( Values.FAILED.name().equals(attempt.get(Keys.TASK_STATUS)) 
) {
                    failedReduces++; 
                }
+               if( Values.KILLED.name().equals(attempt.get(Keys.TASK_STATUS)) 
) {
+                   killedReduces++; 
+               }
            }
          }
        }
 %>
+<b>Number of nodes used: </b> <%=allHosts.size() %><br/>
+<b><a href="analysejobhistory.jsp?jobid=<%=jobid 
%>&jobTrackerId=<%=jobTrackerId %>">Analyse This Job</a></b> 
+<hr/>
+<center>
 <table border="2" cellpadding="5" cellspacing="2">
 <tr>
-<td>Kind</td><td>Total Tasks</td><td>Finished tasks</td><td>Failed 
tasks</td><td>Start Time</td><td>Finish Time</td>
+<td>Kind</td><td>Total Tasks(successful+failed+killed)</td><td>Successful 
tasks</td><td>Failed tasks</td><td>Killed tasks</td><td>Start 
Time</td><td>Finish Time</td>
 </tr>
 <tr>
 <td>Map</td>
@@ -93,6 +106,8 @@
          <%=job.getInt(Keys.FINISHED_MAPS) %></a></td>
        <td><a href="jobtaskshistory.jsp?jobid=<%=jobid 
%>&jobTrackerId=<%=jobTrackerId %>&taskType=<%=Values.MAP.name() 
%>&status=<%=Values.FAILED %>">
          <%=failedMaps %></a></td>
+       <td><a href="jobtaskshistory.jsp?jobid=<%=jobid 
%>&jobTrackerId=<%=jobTrackerId %>&taskType=<%=Values.MAP.name() 
%>&status=<%=Values.KILLED %>">
+         <%=killedMaps %></a></td>
        <td><%=StringUtils.getFormattedTimeWithDiff(dateFormat, mapStarted, 0) 
%></td>
        <td><%=StringUtils.getFormattedTimeWithDiff(dateFormat, mapFinished, 
mapStarted) %></td>
 </tr>
@@ -104,6 +119,8 @@
          <%=job.getInt(Keys.FINISHED_REDUCES)%></a></td>
        <td><a href="jobtaskshistory.jsp?jobid=<%=jobid 
%>&jobTrackerId=<%=jobTrackerId %>&taskType=<%=Values.REDUCE.name() 
%>&status=<%=Values.FAILED %>">
          <%=failedReduces%></a></td>
+       <td><a href="jobtaskshistory.jsp?jobid=<%=jobid 
%>&jobTrackerId=<%=jobTrackerId %>&taskType=<%=Values.REDUCE.name() 
%>&status=<%=Values.KILLED %>">
+         <%=killedReduces%></a></td>  
        <td><%=StringUtils.getFormattedTimeWithDiff(dateFormat, reduceStarted, 
0) %></td>
        <td><%=StringUtils.getFormattedTimeWithDiff(dateFormat, reduceFinished, 
reduceStarted) %></td>
 </tr>
@@ -111,7 +128,7 @@
 
 <br/>
  <%
-       DefaultJobHistoryParser.BadNodesFilter filter = new 
DefaultJobHistoryParser.BadNodesFilter();
+       DefaultJobHistoryParser.FailedOnNodesFilter filter = new 
DefaultJobHistoryParser.FailedOnNodesFilter();
        String dir = System.getProperty("hadoop.log.dir") + File.separator + 
"history" ; 
  
        JobHistory.parseHistory(new File(dir, jobTrackerId+"_" + jobid), 
filter); 
@@ -143,6 +160,40 @@
        }
  %>
 </table>
+<br/>
+ <%
+       DefaultJobHistoryParser.KilledOnNodesFilter killedFilter = new 
DefaultJobHistoryParser.KilledOnNodesFilter();
+       dir = System.getProperty("hadoop.log.dir") + File.separator + "history" 
; 
+ 
+       JobHistory.parseHistory(new File(dir, jobTrackerId+"_" + jobid), 
filter); 
+       badNodes = killedFilter.getValues(); 
+       if( badNodes.size() > 0 ) {
+ %>
+<h3>Killed tasks attempts by nodes </h3>
+<table border="1">
+<tr><td>Hostname</td><td>Killed Tasks</td></tr>
+ <%      
+  for (Map.Entry<String, Set<String>> entry : badNodes.entrySet()) {
+    String node = entry.getKey();
+    Set<String> killedTasks = entry.getValue();
+%>
+       <tr>
+               <td><%=node %></td>
+               <td>
+<%
+               for( String t : killedTasks ) {
+%>
+                <a 
href="taskdetailshistory.jsp?jobid=<%=jobid%>&jobTrackerId=<%=jobTrackerId 
%>&taskid=<%=t %>"><%=t %></a>,&nbsp;
+<%               
+               }
+%>     
+               </td>
+       </tr>
+<%       
+     }
+       }
+ %>
+</table>
  </center>
 
-</body></html>
\ No newline at end of file
+</body></html>

Modified: lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp (original)
+++ lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp Tue Jul 10 16:30:41 
2007
@@ -33,9 +33,12 @@
 <%
        for( JobHistory.Task task : tasks.values() ) {
          if( taskType.equals(task.get(Keys.TASK_TYPE) ) ){
-           if( taskStatus.equals(task.get(Keys.TASK_STATUS)) || 
taskStatus.equals("all")){
-              printTask(jobid, jobTrackerId, task, out); 
-           }
+            Map <String, TaskAttempt> taskAttempts = task.getTaskAttempts();
+            for (JobHistory.TaskAttempt taskAttempt : taskAttempts.values()) {
+             if( taskStatus.equals(taskAttempt.get(Keys.TASK_STATUS)) || 
taskStatus.equals("all")){
+                printTask(jobid, jobTrackerId, task, out); 
+             }
+            }
          }
        }
 %>


Reply via email to