Added: 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/JobStatisticsInterface.java
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/JobStatisticsInterface.java?rev=724531&view=auto
==============================================================================
--- 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/JobStatisticsInterface.java
 (added)
+++ 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/JobStatisticsInterface.java
 Mon Dec  8 14:45:38 2008
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.statistics.job;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.mapred.JobConf;
+
+public interface JobStatisticsInterface {
+  
+  /**
+   * Get job configuration (job.xml) values
+   */
+  public JobConf getJobConf();
+  
+  /*
+   * Get Job Counters of type long
+   */
+  public long getLongValue(Enum key);
+  
+  /*
+   * Get job Counters of type Double
+   */
+  public double getDoubleValue(Enum key);
+  
+  /* 
+   * Get Job Counters of type String
+   */
+  public String getStringValue(Enum key);
+  
+  /*
+   * Set key value of type long
+   */
+  public void setValue(Enum key, long value);
+  
+  /*
+   * Set key value of type double
+   */
+  public void setValue(Enum key, double valye);
+  
+  /*
+   * Set key value of type String
+   */
+  public void setValue(Enum key, String value);
+  
+  /**
+   * @return mapTaskList : ArrayList of MapTaskStatistics
+   * @param mapTaskSortKey : Specific counter key used for sorting the task 
list
+   * @param datatype : indicates the data type of the counter key used for 
sorting
+   * If sort key is null then by default map tasks are sorted using map task 
ids.
+   */
+  public ArrayList<MapTaskStatistics> getMapTaskList(Enum mapTaskSortKey, 
KeyDataType dataType);
+  
+  /**
+   * @return reduceTaskList : ArrayList of ReduceTaskStatistics
+   * @param reduceTaskSortKey : Specific counter key used for sorting the task 
list
+   * @param dataType : indicates the data type of the counter key used for 
sorting
+   * If sort key is null then, by default reduce tasks are sorted using task 
ids.
+   */
+  public ArrayList<ReduceTaskStatistics> getReduceTaskList(Enum 
reduceTaskSortKey, KeyDataType dataType);
+  
+  
+  /*
+   * Print the Job Execution Statistics
+   */
+  public void printJobExecutionStatistics();
+  
+  
+  /*
+   * Job and Task statistics Key data types
+   */
+  public static enum KeyDataType {
+    STRING, LONG, DOUBLE
+  }
+  
+  /**
+   * Job Keys
+   */
+  public static enum JobKeys {
+    JOBTRACKERID, JOBID, JOBNAME, USER, SUBMIT_TIME, CONF_PATH, LAUNCH_TIME, 
TOTAL_MAPS, TOTAL_REDUCES,
+    STATUS, FINISH_TIME, FINISHED_MAPS, FINISHED_REDUCES, FAILED_MAPS, 
FAILED_REDUCES, 
+    LAUNCHED_MAPS, LAUNCHED_REDUCES, RACKLOCAL_MAPS, DATALOCAL_MAPS, 
HDFS_BYTES_READ,
+    HDFS_BYTES_WRITTEN, LOCAL_BYTES_READ, LOCAL_BYTES_WRITTEN, 
COMBINE_OUTPUT_RECORDS,
+    COMBINE_INPUT_RECORDS, REDUCE_INPUT_GROUPS, REDUCE_INPUT_RECORDS, 
REDUCE_OUTPUT_RECORDS,
+    MAP_INPUT_RECORDS, MAP_OUTPUT_RECORDS, MAP_INPUT_BYTES, MAP_OUTPUT_BYTES, 
MAP_HDFS_BYTES_WRITTEN,
+    JOBCONF
+   }
+  
+  /**
+   * Map Task Keys
+   */
+  public static enum MapTaskKeys {
+    TASK_ID, TASK_TYPE, START_TIME, STATUS, FINISH_TIME, HDFS_BYTES_READ, 
HDFS_BYTES_WRITTEN,
+    LOCAL_BYTES_READ, LOCAL_BYTES_WRITTEN, COMBINE_OUTPUT_RECORDS, 
COMBINE_INPUT_RECORDS, 
+    OUTPUT_RECORDS, INPUT_RECORDS, INPUT_BYTES, OUTPUT_BYTES, NUM_ATTEMPTS, 
ATTEMPT_ID,
+    HOSTNAME, SPLITS
+  }
+  
+  /**
+   * Reduce Task Keys
+   */
+  public static enum ReduceTaskKeys {
+    
+    TASK_ID, TASK_TYPE, START_TIME, STATUS, FINISH_TIME, HDFS_BYTES_READ, 
HDFS_BYTES_WRITTEN,
+    LOCAL_BYTES_READ, LOCAL_BYTES_WRITTEN, COMBINE_OUTPUT_RECORDS, 
COMBINE_INPUT_RECORDS, 
+    OUTPUT_RECORDS, INPUT_RECORDS, NUM_ATTEMPTS, ATTEMPT_ID, HOSTNAME, 
SHUFFLE_FINISH_TIME,
+    SORT_FINISH_TIME, INPUT_GROUPS
+  }
+}

Added: 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/MapTaskStatistics.java
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/MapTaskStatistics.java?rev=724531&view=auto
==============================================================================
--- 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/MapTaskStatistics.java
 (added)
+++ 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/MapTaskStatistics.java
 Mon Dec  8 14:45:38 2008
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.statistics.job;
+
+/*
+ * Map task statistics extends TaskStatistics
+ */
+public class MapTaskStatistics extends TaskStatistics {
+  
+}

Added: 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/ReduceTaskStatistics.java
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/ReduceTaskStatistics.java?rev=724531&view=auto
==============================================================================
--- 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/ReduceTaskStatistics.java
 (added)
+++ 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/ReduceTaskStatistics.java
 Mon Dec  8 14:45:38 2008
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.statistics.job;
+
+/*
+ * Reduce task statistics extends TaskStatistics
+ */
+public class ReduceTaskStatistics extends TaskStatistics {
+  
+}

Added: 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/TaskStatistics.java
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/TaskStatistics.java?rev=724531&view=auto
==============================================================================
--- 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/TaskStatistics.java
 (added)
+++ 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/TaskStatistics.java
 Mon Dec  8 14:45:38 2008
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.statistics.job;
+
+import java.util.Hashtable;
+import java.util.Map;
+
+/**
+ *
+ */
+public class TaskStatistics {
+  
+  /*
+   * Stores task statistics as Enum/String key,value pairs.
+   */
+  private Hashtable<Enum, String>  _task = new Hashtable<Enum, String>();
+  
+  /*
+   * Get Long key value
+   */
+  public long getLongValue(Enum key) {
+    return Long.parseLong(this._task.get(key));
+  }
+  
+  /*
+   * Get double key value
+   */
+  public double getDoubleValue(Enum key) {
+    return Double.parseDouble(this._task.get(key));
+  }
+  
+  /*
+   * Get String key value
+   */
+  public String getStringValue(Enum key) {
+    return this._task.get(key);
+  }
+  
+  /*
+   * Set long key value 
+   */
+  public void setValue(Enum key, long value) {
+    this._task.put(key, Long.toString(value));
+  }
+  
+  /*
+   * Set double key value
+   */
+  public void setValue(Enum key, double value) {
+    this._task.put(key, Double.toString(value));
+  }
+  
+  /*
+   * Set String key value
+   */
+  public void setValue(Enum key, String value) {
+    this._task.put(key, value);
+  }
+  
+  /*
+   * Print the key/values pairs for a task 
+   */
+  public void  printKeys () {
+    java.util.Set<Map.Entry<Enum, String>> task = this._task.entrySet();
+    int size = task.size();
+    java.util.Iterator<Map.Entry<Enum, String>> kv = task.iterator();
+    for (int i = 0; i < size; i++)
+    {
+      Map.Entry<Enum, String> entry = (Map.Entry<Enum, String>) kv.next();
+      Enum key = entry.getKey();
+      String value = entry.getValue();
+      System.out.println("Key:<" + key.name() + ">, value:<"+ value +">"); 
+    }
+  }
+}

Added: 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/util/XMLUtils.java
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/util/XMLUtils.java?rev=724531&view=auto
==============================================================================
--- 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/util/XMLUtils.java
 (added)
+++ 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/util/XMLUtils.java
 Mon Dec  8 14:45:38 2008
@@ -0,0 +1,237 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.util;
+
+import java.io.IOException;
+import java.io.File;
+import java.io.InputStream;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.Source;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.Result;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+
+import org.xml.sax.SAXParseException;
+import org.xml.sax.SAXException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+
+/**
+ * Sample Utility class to work with DOM document
+ */
+public class XMLUtils {
+
+  /** Prints the specified node, then prints all of its children. */
+
+  public static void printDOM(Node node) {
+
+    int type = node.getNodeType();
+
+    switch (type) {
+
+      // print the document element
+      case Node.DOCUMENT_NODE: {
+        System.out.print("<?xml version=\"1.0\" ?>");
+        printDOM(((Document)node).getDocumentElement());
+        break;
+      }
+
+      // print element with attributes
+      case Node.ELEMENT_NODE: {
+      System.out.println();
+        System.out.print("<");
+        System.out.print(node.getNodeName());
+        NamedNodeMap attrs = node.getAttributes();
+        for (int i = 0; i < attrs.getLength(); i++) {
+          Node attr = attrs.item(i);
+          System.out.print(" " + attr.getNodeName().trim() +
+                           "=\"" + attr.getNodeValue().trim() +
+                           "\"");
+        }
+        System.out.print(">");
+        NodeList children = node.getChildNodes();
+
+        if (children != null) {
+          int len = children.getLength();
+          for (int i = 0; i < len; i++)
+            printDOM(children.item(i));
+        }
+        break;
+      }
+
+      // handle entity reference nodes
+
+      case Node.ENTITY_REFERENCE_NODE: {
+        System.out.print("&");
+        System.out.print(node.getNodeName().trim());
+        System.out.print(";");
+        break;
+      }
+
+      // print cdata sections
+      case Node.CDATA_SECTION_NODE: {
+        System.out.print("<![CDATA[");
+        System.out.print(node.getNodeValue().trim());
+        System.out.print("]]>");
+        break;
+      }
+
+      // print text
+      case Node.TEXT_NODE: {
+      System.out.println();
+        System.out.print(node.getNodeValue().trim());
+        break;
+      }
+
+      // print processing instruction
+
+    case Node.PROCESSING_INSTRUCTION_NODE: {
+      System.out.print("<?");
+      System.out.print(node.getNodeName().trim());
+      String data = node.getNodeValue().trim(); {
+        System.out.print(" ");
+        System.out.print(data);
+      }
+        System.out.print("?>");
+        break;
+      }
+    }
+
+    if (type == Node.ELEMENT_NODE) {
+      System.out.println();
+      System.out.print("</");
+      System.out.print(node.getNodeName().trim());
+      System.out.print('>');
+    }
+  }
+
+  /*
+   * Get the value of the first (or only) element given its node name
+   */
+  public static String getElementValue(String elementName, Element element) 
throws Exception {
+    String value = null;
+    NodeList childNodes = element.getElementsByTagName(elementName);
+    Element cn = (Element)childNodes.item(0);
+    value = cn.getFirstChild().getNodeValue().trim();
+    //value = childNodes.item(0).getNodeValue().trim();
+    if (value == null) { 
+      throw new Exception ("No element found with given name:"+elementName);
+    }
+    return value;
+  }
+
+  /**
+   * Parse the XML file and create Document
+   * @param fileName
+   * @return Document
+   */
+  public static Document parse(InputStream fs) {
+    Document document = null;
+    // Initiate DocumentBuilderFactory
+    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+
+    // To get a validating parser
+    factory.setValidating(false);
+
+    // To get one that understands namespaces
+    factory.setNamespaceAware(true);
+    try {
+      // Get DocumentBuilder
+      DocumentBuilder builder = factory.newDocumentBuilder();
+
+      // Parse and load into memory the Document
+      //document = builder.parse( new File(fileName));
+      document = builder.parse(fs);
+      return document;
+    } catch (SAXParseException spe) {
+      // Error generated by the parser
+      System.out.println("\n** Parsing error , line " + spe.getLineNumber()
+                         + ", uri " + spe.getSystemId());
+      System.out.println(" " + spe.getMessage() );
+      // Use the contained exception, if any
+      Exception x = spe;
+      if (spe.getException() != null)
+        x = spe.getException();
+      x.printStackTrace();
+    } catch (SAXException sxe) {
+      // Error generated during parsing
+      Exception x = sxe;
+      if (sxe.getException() != null)
+        x = sxe.getException();
+      x.printStackTrace();
+    } catch (ParserConfigurationException pce) {
+      // Parser with specified options can't be built
+      pce.printStackTrace();
+    } catch (IOException ioe) {
+      // I/O error
+      ioe.printStackTrace();
+    }
+    
+    return null;
+  }
+
+  /**
+   * This method writes a DOM document to a file
+   * @param filename
+   * @param document
+   */
+  public static void writeXmlToFile(String filename, Document document) {
+    try {
+      // Prepare the DOM document for writing
+      Source source = new DOMSource(document);
+      
+      // Prepare the output file
+      File file = new File(filename);
+      Result result = new StreamResult(file);
+
+      // Write the DOM document to the file
+      // Get Transformer
+      Transformer xformer = TransformerFactory.newInstance().newTransformer();
+
+      // Write to a file
+      xformer.transform(source, result);
+
+    } catch (TransformerConfigurationException e) {
+      System.out.println("TransformerConfigurationException: " + e);
+    } catch (TransformerException e) {
+      System.out.println("TransformerException: " + e);
+    }
+  }
+
+  /**
+   * Count Elements in Document by Tag Name
+   * @param tag
+   * @param document
+   * @return number elements by Tag Name
+   */
+  public static int countByTagName(String tag, Document document){
+    NodeList list = document.getElementsByTagName(tag);
+    return list.getLength();
+  }
+}

Added: 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh?rev=724531&view=auto
==============================================================================
--- 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
 (added)
+++ 
hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
 Mon Dec  8 14:45:38 2008
@@ -0,0 +1,47 @@
+#!/bin/sh
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+this="$0"
+while [ -h "$this" ]; do
+  ls=`ls -ld "$this"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '.*/.*' > /dev/null; then
+    this="$link"
+  else
+    this=`dirname "$this"`/"$link"
+  fi
+done
+
+# convert relative path to absolute path
+bin=`dirname "$this"`
+script=`basename "$this"`
+bin=`cd "$bin"; pwd`
+this="$bin/$script"
+
+# Check if HADOOP_HOME AND JAVA_HOME is set.
+if [ -z $HADOOP_HOME ] ; then
+  echo "HADOOP_HOME environment variable not defined"
+  exit -1;
+fi
+
+if [ -z $JAVA_HOME ] ; then
+  echo "JAVA_HOME environment variable not defined"
+  exit -1;
+fi
+
+hadoopVersion=`$HADOOP_HOME/bin/hadoop version | awk 'BEGIN { RS = "" ; FS = 
"\n" } ; { print $1 }' | awk '{print $2}'`
+
+$JAVA_HOME/bin/java -classpath 
$HADOOP_HOME/hadoop-${hadoopVersion}-core.jar:$HADOOP_HOME/contrib/vaidya/hadoop-${hadoopVersion}-vaidya.jar:$HADOOP_HOME/lib/commons-logging-1.0.4.jar:${CLASSPATH}
 org.apache.hadoop.vaidya.postexdiagnosis.PostExPerformanceDiagnoser $@

Modified: hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml?rev=724531&r1=724530&r2=724531&view=diff
==============================================================================
--- hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml 
(original)
+++ hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml Mon Dec 
 8 14:45:38 2008
@@ -52,6 +52,7 @@
     <hod-admin-guide label="HOD Admin Guide" href="hod_admin_guide.html"/>
     <hod-config-guide label="HOD Config Guide" href="hod_config_guide.html"/>
     <capacity_scheduler label="Capacity Scheduler" 
href="capacity_scheduler.html"/>
+    <vaidya    label="Hadoop Vaidya" href="vaidya.html"/>
     <api       label="API Docs"           href="ext:api/index" />
     <jdiff     label="API Changes"        href="ext:jdiff/changes" />
     <wiki      label="Wiki"               href="ext:wiki" />

Added: hadoop/core/trunk/src/docs/src/documentation/content/xdocs/vaidya.xml
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/vaidya.xml?rev=724531&view=auto
==============================================================================
--- hadoop/core/trunk/src/docs/src/documentation/content/xdocs/vaidya.xml 
(added)
+++ hadoop/core/trunk/src/docs/src/documentation/content/xdocs/vaidya.xml Mon 
Dec  8 14:45:38 2008
@@ -0,0 +1,171 @@
+<?xml version="1.0"?>
+<!--
+  Copyright 2002-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" 
"http://forrest.apache.org/dtd/document-v20.dtd";>
+
+<document>
+  
+  <header>
+    <title>Hadoop Vaidya: A Performance Diagnostic Tool for Map-Reduce 
Jobs</title>
+  </header>
+  
+  <body>
+  
+    <section>
+      <title>Purpose</title>
+      
+      <p>This document describes various user-facing facets of the Hadoop 
Vaidya tool. It
+         describes both, how to execute a default set of rules against your 
Map-Reduce job counters,
+         as well as how to write and execute new rules to detect specific 
performance problems. 
+      </p>
+      <p>At present few sample test rules are provided with the tool with the 
objective of growing the rules database over the time. 
+         You are welcome to contribute the new rules for everyone's benefit 
and for that follow the similar
+         <a href="http://wiki.apache.org/hadoop/HowToContribute";>procedure</a> 
+         specified on Apache Hadoop website.
+      </p>
+    </section>
+    
+    <section>
+      <title>Pre-requisites</title>
+      
+      <p>Ensure that Hadoop is installed and configured. More details:</p> 
+      <ul>
+        <li>
+          Make sure HADOOP_HOME environment variable is set.
+        </li>
+        <li>
+          Make sure Java is installed and configured as a part of the Hadoop 
installation.
+        </li>
+      </ul>
+    </section>
+    
+    <section>
+      <title>Overview</title>
+      
+      <p>Hadoop Vaidya (Vaidya in Sanskrit language means "one who knows", or 
"a physician") 
+           is a rule based performance diagnostic tool for 
+        Map/Reduce jobs. It performs a post execution analysis of map/reduce 
+        job by parsing and collecting execution statistics through job history 
+        and job configuration files. It runs a set of predefined tests/rules 
+        against job execution statistics to diagnose various performance 
problems. 
+        Each test rule detects a specific performance problem with the 
Map/Reduce job and provides 
+        a targeted advice to the user. This tool generates an XML report based 
on 
+        the evaluation results of individual test rules.
+      </p>
+      
+    </section>
+  
+    <section>
+        <title>Terminology</title>
+        
+        <p> This section describes main concepts and terminology involved with 
Hadoop Vaidya,</p>
+               <ul>
+                       <li> <em>PostExPerformanceDiagnoser</em>: This class 
extends the base Diagnoser class and acts as a driver for post execution 
performance analysis of Map/Reduce Jobs. 
+                       It detects performance inefficiencies by executing a 
set of performance diagnosis rules against the job execution statistics.</li>
+                       <li> <em>Job Statistics</em>: This includes the job 
configuration information (job.xml) and various counters logged by Map/Reduce 
job as a part of the job history log
+                          file. The counters are parsed and collected into the 
Job Statistics data structures, which contains global job level aggregate 
counters and 
+                            a set of counters for each Map and Reduce 
task.</li>
+                       <li> <em>Diagnostic Test/Rule</em>: This is a program 
logic that detects the inefficiency of M/R job based on the job statistics. The
+                                description of the Test is specified as an XML 
element (DiagnosticTest) in a test description file e.g. 
+                                default tests description file, 
<em>$HADOOP_HOME/contrib/vaidya/conf/postex_diagnosis_tests.xml</em>. The 
actual logic is coded as
+                                a java class and referenced in the 
DiagnosticTest XML element. </li>
+               </ul>
+       <p></p>
+       <p>Following section describes the <em>DiagnosticTest</em> XML element 
in a diagnostic test description file </p>
+               <ul>
+                       <li> <em>DiagnosticTest{Title}</em>: Specifies a short 
name/description of the test.</li>
+                       <li> <em>DiagnosticTest{ClassName}</em>: Specifies 
fully qualified class name that implements the test logic.</li>
+                       <li> <em>DiagnosticTest{Description}</em>: Specifies a 
full description of the test rule.</li>
+                       <li> <em>DiagnosticTest{Importance}</em>: Specifies a 
declarative value for overall importance of the test rule. (Values: High, 
Medium, Low)</li>
+                       <li> <em>DiagnosticTest{SuccessThreshod}</em>: This is 
a threshold value specified by test case writer such that if impact level of 
the test case
+                                is lesser, then test is declared as PASSED (or 
NEGATIVE). The impact level is calculated and returned
+                                by individual test's evaluate function, 
specifying the degree of problem job has with respect to the condition being 
evaluated.</li>
+                       <li> <em>DiagnosticTest{Prescription}</em>: This is a 
targeted advice written by the test case adviser for the user to follow when 
test is not PASSED. </li>
+                       <li> <em>DiagonsticTest{InputElement}</em>: This is a 
test specific input that test writer has to optionally provide. This will be 
supplied to individual test case
+                       class so that test writer can use it within test case. 
This is typically a test configuration information such that test writer need 
not change the
+                       Java code for test case but rather can configure the 
test case using these input values. </li>
+               </ul>
+       <p></p>
+       <p>Following section describes the performance analysis report 
generated by the tool in XML format</p>
+               <ul>
+                       <li> <em>PostExPerformanceDiagnosticReport</em>: This 
is a document (root) element from the XML report generated by the tool. </li>
+                       <li> <em>TestReportElement</em>: This is a XML report 
element from the test report document, one for each individual test specified 
in test description
+                                file </li>  
+                       <li> <em>TestReportElement{TestTitle}</em>: Will be 
included from DiagnosticTest{Title} </li>
+                       <li> <em>TestReportElement{TestDescription}</em>: Will 
be included from DiagnosticTest{Description} </li>
+                       <li> <em>TestReportElement{TestImportance}</em>: Will 
be included from DiagnosticTest{Importance} </li>
+                       <li> <em>TestReportElement{TestSeverity}</em>: This is 
a product of Test Impact level and Test Importance. It indicates overall 
severity of the test.</li>
+                       <li> <em>TestReportElement{ReferenceDetails}</em>: This 
is a test specific runtime information provided by test case to support the 
test result and severity. Typically
+                                Test writer should print the test impact level 
in this section. </li>
+                       <li> <em>TestReportElement{TestResults}</em>: This is 
boolean outcome of the test based on the SuccessThreshold specified by test 
writer in the DiagnosticTest description. The 
+                                test PASSED(NEGATIVE) indicates no problem vs. 
FAILED (POSITIVE) indicates a potential problem with the job for given test 
case. </li>
+                       <li> <em>TestReportElement{TestPrescription}</em>: This 
will be included from DiagnosticTest{Prescription}, unless test case writer 
overrides it in the test case class through getPrescription()
+                                method </li>
+               </ul>    
+       </section>
+       
+       <section>
+               <title>How to Execute the Hadoop Vaidya Tool</title>
+                 
+       <p>Script to execute Hadoop Vaidya is in 
<code>$HADOOP_HOME/contrib/vaidya/bin/</code> directory.
+                  It comes with a default set of rules defined in file: 
+           
<code>$HADOOP_HOME/contrib/vaidya/conf/postex_diagnosis_tests.xml</code> </p>
+                 <ul>
+                       <li>Make sure HADOOP_HOME environment variable is set 
and Java is installed and configured.</li>
+                       <li>Execute the Hadoop Vaidya script with -help (or 
without any arguments) to get the command line help. e.g. 
+                       <code>=>sh $HADOOP_HOME/contrib/vaidya/bin/vaidya.sh 
-help</code></li>
+                       <li>User needs to 
+                                supply job's configuration file 
(<code>-jobconf job_conf.xml</code>), job history log file (<code>-joblog 
job_history_log_file</code>), and optionally the test description
+                                file (<code>-testconf 
postex_diagonostic_tests.xml</code>). If test description file is not specified 
then the default one is picked up from the Hadoop Vaidya Jar 
(<code>$HADOOP_HOME/contrib/vaidya/hadoop-{version}-vaidya.jar</code>).
+                                This default test description file is also 
available at following location for users to make a local copy, modify and add 
new test rules: 
+                            
<code>$HADOOP_HOME/contrib/vaidya/conf/postex_diagnostic_tests.xml</code></li>
+                       <li> Use <code>-report report_file</code> option to 
store the xml report into specified report_file. </li>  
+                </ul>
+       </section>
+       
+    <section>
+               <title>How to Write and Execute your own Tests</title>
+               <p>Writing and executing your own test rules is not very hard. 
You can take a look at Hadoop Vaidya source code for existing set of tests. 
+                  The source code is at this <a 
href="http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/";>hadoop
 svn repository location</a>
+                  . The default set of tests are under 
<code>"postexdiagnosis/tests/"</code> folder.</p>
+               <ul>
+                 <li>Writing a test class for your new test case should extend 
the <code>org.apache.hadoop.vaidya.DiagnosticTest</code> class and 
+                      it should override following three methods from the base 
class, 
+              <ul> 
+                               <li> evaluate() </li>
+                               <li> getPrescription() </li> 
+                               <li> getReferenceDetails() </li> 
+              </ul>
+          </li>
+                 <li>Make a local copy of the 
<code>$HADOOP_HOME/contrib/vaidya/conf/postex_diagnostic_tests.xml</code> file 
or create a new test description XML file.</li>
+                 <li>Add the test description element for your new test case 
to this test description file.</li>
+                 <li>Compile your new test class (or multiple classes), 
archive them into a Jar file and add it to the CLASSPATH e.g. (<code>export 
CLASSPATH=$CLASSPATH:newtests.jar</code>)</li>
+                 <li>Execute the Hadoop Vaidya script with the job 
configuration, job history log and reference to newly created test description 
file using <em>--testconf</em> option. 
+                 <code>=>sh $HADOOP_HOME/contrib/vaidya/bin/vaidya.sh -joblog 
job_history_log_file -jobconf job.xml -testconf new_test_description_file 
-report report.xml</code></li>
+               </ul>
+       </section>
+       
+    <p> </p>
+    <p> </p>
+    <p>
+      <em>Java and JNI are trademarks or registered trademarks of 
+      Sun Microsystems, Inc. in the United States and other countries.</em>
+    </p>
+    
+  </body>
+  
+</document>
\ No newline at end of file


Reply via email to