Author: cutting Date: Tue Jun 20 11:52:07 2006 New Revision: 415765 URL: http://svn.apache.org/viewvc?rev=415765&view=rev Log: HADOOP-271. Add links from jobtracker's web ui to tasktracker's web ui. Also attempt to log a thread dump of child processes before they're killed.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=415765&r1=415764&r2=415765&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Tue Jun 20 11:52:07 2006 @@ -16,6 +16,10 @@ tasktracker, greatly increasing code coverage. (Milind Bhandarkar via cutting) + 5. HADOOP-271. Add links from jobtracker's web ui to tasktracker's + web ui. Also attempt to log a thread dump of child processes + before they're killed. (omalley via cutting) + Release 0.3.2 - 2006-06-09 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java?rev=415765&r1=415764&r2=415765&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java Tue Jun 20 11:52:07 2006 @@ -236,6 +236,14 @@ } /** + * Get the job configuration + * @return the job's configuration + */ + JobConf getJobConf() { + return conf; + } + + /** * Return a treeset of completed TaskInProgress objects */ public Vector reportTasksInProgress(boolean shouldBeMap, boolean shouldBeComplete) { @@ -604,7 +612,7 @@ TaskStatus.FAILED, reason, reason, - hostname); + trackerName); failedTask(tip, taskid, status, trackerName); } Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java?rev=415765&r1=415764&r2=415765&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java Tue Jun 20 11:52:07 2006 @@ -128,8 +128,11 @@ } if (tip != null) { JobInProgress job = tip.getJob(); + String trackerName = getAssignedTracker(taskId); + TaskTrackerStatus trackerStatus = + getTaskTracker(trackerName); job.failedTask(tip, taskId, "Error launching task", - "n/a", "n/a"); + trackerStatus.getHost(), trackerName); } itr.remove(); } else { @@ -1029,6 +1032,15 @@ return tip.getTaskStatuses(); } + /** + * Get tracker name for a given task id. + * @param taskId the name of the task + * @return The name of the task tracker + */ + public synchronized String getAssignedTracker(String taskId) { + return (String) taskidToTrackerMap.get(taskId); + } + /////////////////////////////////////////////////////////////// // JobTracker methods /////////////////////////////////////////////////////////////// @@ -1055,7 +1067,7 @@ void updateTaskStatuses(TaskTrackerStatus status) { for (Iterator it = status.taskReports(); it.hasNext(); ) { TaskStatus report = (TaskStatus) it.next(); - report.setHostname(status.getHost()); + report.setTaskTracker(status.getTrackerName()); String taskId = report.getTaskId(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId); if (tip == null) { Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java?rev=415765&r1=415764&r2=415765&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java Tue Jun 20 11:52:07 2006 @@ -37,20 +37,20 @@ private int runState; private String diagnosticInfo; private String stateString; - private String hostname; + private String taskTracker; public TaskStatus() {} public TaskStatus(String taskid, boolean isMap, float progress, int runState, String diagnosticInfo, - String stateString, String hostname) { + String stateString, String taskTracker) { this.taskid = taskid; this.isMap = isMap; this.progress = progress; this.runState = runState; this.diagnosticInfo = diagnosticInfo; this.stateString = stateString; - this.hostname = hostname; + this.taskTracker = taskTracker; } public String getTaskId() { return taskid; } @@ -58,8 +58,8 @@ public float getProgress() { return progress; } public void setProgress(float progress) { this.progress = progress; } public int getRunState() { return runState; } - public String getHostname() {return hostname;} - public void setHostname(String host) { this.hostname = host;} + public String getTaskTracker() {return taskTracker;} + public void setTaskTracker(String tracker) { this.taskTracker = tracker;} public void setRunState(int runState) { this.runState = runState; } public String getDiagnosticInfo() { return diagnosticInfo; } public void setDiagnosticInfo(String info) { this.diagnosticInfo = info; } Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?rev=415765&r1=415764&r2=415765&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Tue Jun 20 11:52:07 2006 @@ -250,6 +250,28 @@ } /** + * Are we running under killall-less operating system. + */ + private static boolean isWindows = + System.getProperty("os.name").startsWith("Windows"); + + /** + * Get the call stacks for all java processes on this system. + * Obviously, this is only useful for debugging. + */ + private static void getCallStacks() { + if (LOG.isDebugEnabled() && !isWindows) { + try { + Process proc = + Runtime.getRuntime().exec("killall -QUIT java"); + proc.waitFor(); + } catch (IOException ie) { + LOG.warn(StringUtils.stringifyException(ie)); + } catch (InterruptedException ie) {} + } + } + + /** * Main service loop. Will stay in this loop forever. */ int offerService() throws Exception { @@ -341,6 +363,7 @@ (timeSinceLastReport / 1000) + " seconds. Killing."; LOG.info(tip.getTask().getTaskId() + ": " + msg); + getCallStacks(); tip.reportDiagnosticInfo(msg); try { tip.killAndCleanup(true); @@ -582,7 +605,13 @@ /** */ public synchronized TaskStatus createStatus() { - TaskStatus status = new TaskStatus(task.getTaskId(), task.isMapTask(), progress, runstate, diagnosticInfo.toString(), (stateString == null) ? "" : stateString, ""); + TaskStatus status = + new TaskStatus(task.getTaskId(), + task.isMapTask(), + progress, runstate, + diagnosticInfo.toString(), + (stateString == null) ? "" : stateString, + getName()); if (diagnosticInfo.length() > 0) { diagnosticInfo = new StringBuffer(); } @@ -902,6 +931,7 @@ LOG.info("Ping exception: " + msg); remainingRetries -=1; if (remainingRetries == 0) { + getCallStacks(); LOG.warn("Last retry, killing "+taskid); System.exit(65); } Modified: lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp?rev=415765&r1=415764&r2=415765&view=diff ============================================================================== --- lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp (original) +++ lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp Tue Jun 20 11:52:07 2006 @@ -20,12 +20,20 @@ String tipId = tip.getTIPId(); for(int i=0; i < statuses.length; ++i) { if (statuses[i].getRunState() == TaskStatus.FAILED) { + String taskTrackerName = statuses[i].getTaskTracker(); + TaskTrackerStatus taskTracker = tracker.getTaskTracker(taskTrackerName); out.print("<tr><td>" + statuses[i].getTaskId() + "</td><td><a href=\"/taskdetails.jsp?jobid="+ jobId + "&taskid=" + tipId + "\">" + tipId + - "</a></td><td>" + statuses[i].getHostname() + - "</td><td>" + statuses[i].getDiagnosticInfo() + - "</td></tr>\n"); + "</a></td>"); + if (taskTracker == null) { + out.print("<td>" + taskTrackerName + "</td>"); + } else { + out.print("<td><a href=\"http://" + taskTracker.getHost() + ":" + + taskTracker.getHttpPort() + "\">" + taskTracker.getHost() + + "</a></td>"); + } + out.print("<td>" + statuses[i].getDiagnosticInfo() + "</td></tr>\n"); } } } Modified: lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp?rev=415765&r1=415764&r2=415765&view=diff ============================================================================== --- lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp (original) +++ lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp Tue Jun 20 11:52:07 2006 @@ -52,8 +52,16 @@ <% for (int i = 0; i < ts.length; i++) { TaskStatus status = ts[i]; + String taskTrackerName = status.getTaskTracker(); + TaskTrackerStatus taskTracker = tracker.getTaskTracker(taskTrackerName); out.print("<tr><td>" + status.getTaskId() + "</td>"); - out.print("<td>" + status.getHostname() + "</td>"); + if (taskTracker == null) { + out.print("<td>" + taskTrackerName + "</td>"); + } else { + out.print("<td><a href=\"http://" + taskTracker.getHost() + ":" + + taskTracker.getHttpPort() + "\">" + taskTracker.getHost() + + "</a></td>"); + } out.print("<td>"); writeString(out, status.getRunState()); out.print("</td>");