s...

lewismc Thu, 08 Jan 2015 22:35:31 -0800

Modified: 
nutch/branches/2.x/src/java/org/apache/nutch/util/EncodingDetector.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/EncodingDetector.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/EncodingDetector.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/EncodingDetector.java Fri 
Jan  9 06:34:33 2015
@@ -35,27 +35,26 @@ import java.util.List;
 
 /**
  * A simple class for detecting character encodings.
- *
+ * 
  * <p>
  * Broadly this encompasses two functions, which are distinctly separate:
- *
+ * 
  * <ol>
- *  <li>Auto detecting a set of "clues" from input text.</li>
- *  <li>Taking a set of clues and making a "best guess" as to the
- *      "real" encoding.</li>
+ * <li>Auto detecting a set of "clues" from input text.</li>
+ * <li>Taking a set of clues and making a "best guess" as to the "real"
+ * encoding.</li>
  * </ol>
  * </p>
- *
+ * 
  * <p>
- * A caller will often have some extra information about what the
- * encoding might be (e.g. from the HTTP header or HTML meta-tags, often
- * wrong but still potentially useful clues). The types of clues may differ
- * from caller to caller. Thus a typical calling sequence is:
+ * A caller will often have some extra information about what the encoding 
might
+ * be (e.g. from the HTTP header or HTML meta-tags, often wrong but still
+ * potentially useful clues). The types of clues may differ from caller to
+ * caller. Thus a typical calling sequence is:
  * <ul>
- *    <li>Run step (1) to generate a set of auto-detected clues;</li>
- *    <li>Combine these clues with the caller-dependent "extra clues"
- *        available;</li>
- *    <li>Run step (2) to guess what the most probable answer is.</li>
+ * <li>Run step (1) to generate a set of auto-detected clues;</li>
+ * <li>Combine these clues with the caller-dependent "extra clues" 
available;</li>
+ * <li>Run step (2) to guess what the most probable answer is.</li>
  * </p>
  */
 public class EncodingDetector {
@@ -90,34 +89,32 @@ public class EncodingDetector {
 
     @Override
     public String toString() {
-      return value + " (" + source +
-           ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
+      return value + " (" + source
+          + ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + 
")";
     }
 
     public boolean isEmpty() {
-      return (value==null || "".equals(value));
+      return (value == null || "".equals(value));
     }
 
     public boolean meetsThreshold() {
-      return (confidence < 0 ||
-               (minConfidence >= 0 && confidence >= minConfidence));
+      return (confidence < 0 || (minConfidence >= 0 && confidence >= 
minConfidence));
     }
   }
 
-  public static final Logger LOG = 
LoggerFactory.getLogger(EncodingDetector.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(EncodingDetector.class);
 
   public static final int NO_THRESHOLD = -1;
 
-  public static final String MIN_CONFIDENCE_KEY =
-    "encodingdetector.charset.min.confidence";
+  public static final String MIN_CONFIDENCE_KEY = 
"encodingdetector.charset.min.confidence";
 
-  private static final HashMap<String, String> ALIASES =
-    new HashMap<String, String>();
+  private static final HashMap<String, String> ALIASES = new HashMap<String, 
String>();
 
   private static final HashSet<String> DETECTABLES = new HashSet<String>();
 
   // CharsetDetector will die without a minimum amount of data.
-  private static final int MIN_LENGTH=4;
+  private static final int MIN_LENGTH = 4;
 
   static {
     DETECTABLES.add("text/html");
@@ -130,23 +127,22 @@ public class EncodingDetector {
     DETECTABLES.add("application/rss+xml");
     DETECTABLES.add("application/xhtml+xml");
     /*
-     * the following map is not an alias mapping table, but
-     * maps character encodings which are often used in mislabelled
-     * documents to their correct encodings. For instance,
-     * there are a lot of documents labelled 'ISO-8859-1' which contain
-     * characters not covered by ISO-8859-1 but covered by windows-1252.
-     * Because windows-1252 is a superset of ISO-8859-1 (sharing code points
-     * for the common part), it's better to treat ISO-8859-1 as
-     * synonymous with windows-1252 than to reject, as invalid, documents
-     * labelled as ISO-8859-1 that have characters outside ISO-8859-1.
+     * the following map is not an alias mapping table, but maps character
+     * encodings which are often used in mislabelled documents to their correct
+     * encodings. For instance, there are a lot of documents labelled
+     * 'ISO-8859-1' which contain characters not covered by ISO-8859-1 but
+     * covered by windows-1252. Because windows-1252 is a superset of 
ISO-8859-1
+     * (sharing code points for the common part), it's better to treat
+     * ISO-8859-1 as synonymous with windows-1252 than to reject, as invalid,
+     * documents labelled as ISO-8859-1 that have characters outside 
ISO-8859-1.
      */
     ALIASES.put("ISO-8859-1", "windows-1252");
     ALIASES.put("EUC-KR", "x-windows-949");
     ALIASES.put("x-EUC-CN", "GB18030");
     ALIASES.put("GBK", "GB18030");
-    //ALIASES.put("Big5", "Big5HKSCS");
-    //ALIASES.put("TIS620", "Cp874");
-    //ALIASES.put("ISO-8859-11", "Cp874");
+    // ALIASES.put("Big5", "Big5HKSCS");
+    // ALIASES.put("TIS620", "Cp874");
+    // ALIASES.put("ISO-8859-11", "Cp874");
 
   }
 
@@ -164,16 +160,16 @@ public class EncodingDetector {
 
   public void autoDetectClues(WebPage page, boolean filter) {
     autoDetectClues(page.getContent(), page.getContentType(),
-        parseCharacterEncoding(page.getHeaders().get(CONTENT_TYPE_UTF8)), 
filter);
+        parseCharacterEncoding(page.getHeaders().get(CONTENT_TYPE_UTF8)),
+        filter);
   }
 
   private void autoDetectClues(ByteBuffer dataBuffer, CharSequence typeUtf8,
-                               String encoding, boolean filter) {
+      String encoding, boolean filter) {
     int length = dataBuffer.remaining();
     String type = TableUtil.toString(typeUtf8);
 
-    if (minConfidence >= 0 && DETECTABLES.contains(type)
-        && length > MIN_LENGTH) {
+    if (minConfidence >= 0 && DETECTABLES.contains(type) && length > 
MIN_LENGTH) {
       CharsetMatch[] matches = null;
 
       // do all these in a try/catch; setText and detect/detectAll
@@ -214,12 +210,14 @@ public class EncodingDetector {
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param row URL's row
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param row
+   *          URL's row
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   public String guessEncoding(WebPage page, String defaultValue) {
@@ -230,33 +228,33 @@ public class EncodingDetector {
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param baseUrl Base URL
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param baseUrl
+   *          Base URL
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   private String guessEncoding(String baseUrl, String defaultValue) {
     /*
-     * This algorithm could be replaced by something more sophisticated;
-     * ideally we would gather a bunch of data on where various clues
-     * (autodetect, HTTP headers, HTML meta tags, etc.) disagree, tag each with
-     * the correct answer, and use machine learning/some statistical method
-     * to generate a better heuristic.
+     * This algorithm could be replaced by something more sophisticated; 
ideally
+     * we would gather a bunch of data on where various clues (autodetect, HTTP
+     * headers, HTML meta tags, etc.) disagree, tag each with the correct
+     * answer, and use machine learning/some statistical method to generate a
+     * better heuristic.
      */
 
-
     if (LOG.isTraceEnabled()) {
       findDisagreements(baseUrl, clues);
     }
 
     /*
-     * Go down the list of encoding "clues". Use a clue if:
-     *  1. Has a confidence value which meets our confidence threshold, OR
-     *  2. Doesn't meet the threshold, but is the best try,
-     *     since nothing else is available.
+     * Go down the list of encoding "clues". Use a clue if: 1. Has a confidence
+     * value which meets our confidence threshold, OR 2. Doesn't meet the
+     * threshold, but is the best try, since nothing else is available.
      */
     EncodingClue defaultClue = new EncodingClue(defaultValue, "default");
     EncodingClue bestClue = defaultClue;
@@ -268,8 +266,8 @@ public class EncodingDetector {
       String charset = clue.value;
       if (minConfidence >= 0 && clue.confidence >= minConfidence) {
         if (LOG.isTraceEnabled()) {
-          LOG.trace(baseUrl + ": Choosing encoding: " + charset +
-                    " with confidence " + clue.confidence);
+          LOG.trace(baseUrl + ": Choosing encoding: " + charset
+              + " with confidence " + clue.confidence);
         }
         return resolveEncodingAlias(charset).toLowerCase();
       } else if (clue.confidence == NO_THRESHOLD && bestClue == defaultClue) {
@@ -289,10 +287,10 @@ public class EncodingDetector {
   }
 
   /*
-   * Strictly for analysis, look for "disagreements." The top guess from
-   * each source is examined; if these meet the threshold and disagree, then
-   * we log the information -- useful for testing or generating training data
-   * for a better heuristic.
+   * Strictly for analysis, look for "disagreements." The top guess from each
+   * source is examined; if these meet the threshold and disagree, then we log
+   * the information -- useful for testing or generating training data for a
+   * better heuristic.
    */
   private void findDisagreements(String url, List<EncodingClue> newClues) {
     HashSet<String> valsSeen = new HashSet<String>();
@@ -314,9 +312,9 @@ public class EncodingDetector {
     if (disagreement) {
       // dump all values in case of disagreement
       StringBuffer sb = new StringBuffer();
-      sb.append("Disagreement: "+url+"; ");
+      sb.append("Disagreement: " + url + "; ");
       for (int i = 0; i < newClues.size(); i++) {
-        if (i>0) {
+        if (i > 0) {
           sb.append(", ");
         }
         sb.append(newClues.get(i));
@@ -331,7 +329,7 @@ public class EncodingDetector {
         return null;
       String canonicalName = new String(Charset.forName(encoding).name());
       return ALIASES.containsKey(canonicalName) ? ALIASES.get(canonicalName)
-                                                : canonicalName;
+          : canonicalName;
     } catch (Exception e) {
       LOG.warn("Invalid encoding " + encoding + " detected, using default.");
       return null;
@@ -339,13 +337,12 @@ public class EncodingDetector {
   }
 
   /**
-   * Parse the character encoding from the specified content type header.
-   * If the content type is null, or there is no explicit character encoding,
-   * <code>null</code> is returned.
-   * <br />
-   * This method was copied from org.apache.catalina.util.RequestUtil,
-   * which is licensed under the Apache License, Version 2.0 (the "License").
-   *
+   * Parse the character encoding from the specified content type header. If 
the
+   * content type is null, or there is no explicit character encoding,
+   * <code>null</code> is returned. <br />
+   * This method was copied from org.apache.catalina.util.RequestUtil, which is
+   * licensed under the Apache License, Version 2.0 (the "License").
+   * 
    * @param contentTypeUtf8
    */
   public static String parseCharacterEncoding(CharSequence contentTypeUtf8) {
@@ -361,51 +358,36 @@ public class EncodingDetector {
       encoding = encoding.substring(0, end);
     encoding = encoding.trim();
     if ((encoding.length() > 2) && (encoding.startsWith("\""))
-      && (encoding.endsWith("\"")))
+        && (encoding.endsWith("\"")))
       encoding = encoding.substring(1, encoding.length() - 1);
     return (encoding.trim());
 
   }
 
-  /*public static void main(String[] args) throws IOException {
-    if (args.length != 1) {
-      System.err.println("Usage: EncodingDetector <file>");
-      System.exit(1);
-    }
-
-    Configuration conf = NutchConfiguration.create();
-    EncodingDetector detector =
-      new EncodingDetector(NutchConfiguration.create());
-
-    // do everything as bytes; don't want any conversion
-    BufferedInputStream istr =
-      new BufferedInputStream(new FileInputStream(args[0]));
-    ByteArrayOutputStream ostr = new ByteArrayOutputStream();
-    byte[] bytes = new byte[1000];
-    boolean more = true;
-    while (more) {
-      int len = istr.read(bytes);
-      if (len < bytes.length) {
-        more = false;
-        if (len > 0) {
-          ostr.write(bytes, 0, len);
-        }
-      } else {
-        ostr.write(bytes);
-      }
-    }
-
-    byte[] data = ostr.toByteArray();
-    MimeUtil mimeTypes = new MimeUtil(conf);
-
-    // make a fake Content
-    Content content =
-      new Content("", "", data, "text/html", new Metadata(), mimeTypes);
-
-    detector.autoDetectClues(content, true);
-    String encoding = detector.guessEncoding(content,
-        conf.get("parser.character.encoding.default"));
-    System.out.println("Guessed encoding: " + encoding);
-  }*/
+  /*
+   * public static void main(String[] args) throws IOException { if 
(args.length
+   * != 1) { System.err.println("Usage: EncodingDetector <file>");
+   * System.exit(1); }
+   * 
+   * Configuration conf = NutchConfiguration.create(); EncodingDetector 
detector
+   * = new EncodingDetector(NutchConfiguration.create());
+   * 
+   * // do everything as bytes; don't want any conversion BufferedInputStream
+   * istr = new BufferedInputStream(new FileInputStream(args[0]));
+   * ByteArrayOutputStream ostr = new ByteArrayOutputStream(); byte[] bytes =
+   * new byte[1000]; boolean more = true; while (more) { int len =
+   * istr.read(bytes); if (len < bytes.length) { more = false; if (len > 0) {
+   * ostr.write(bytes, 0, len); } } else { ostr.write(bytes); } }
+   * 
+   * byte[] data = ostr.toByteArray(); MimeUtil mimeTypes = new MimeUtil(conf);
+   * 
+   * // make a fake Content Content content = new Content("", "", data,
+   * "text/html", new Metadata(), mimeTypes);
+   * 
+   * detector.autoDetectClues(content, true); String encoding =
+   * detector.guessEncoding(content,
+   * conf.get("parser.character.encoding.default"));
+   * System.out.println("Guessed encoding: " + encoding); }
+   */
 
 }


Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/FSUtils.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/FSUtils.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/FSUtils.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/FSUtils.java Fri Jan  9 
06:34:33 2015
@@ -33,16 +33,20 @@ public class FSUtils {
    * path. If removeOld is set to false then the old path will be set to the
    * name current.old.
    * 
-   * @param fs The FileSystem.
-   * @param current The end path, the one being replaced.
-   * @param replacement The path to replace with.
-   * @param removeOld True if we are removing the current path.
+   * @param fs
+   *          The FileSystem.
+   * @param current
+   *          The end path, the one being replaced.
+   * @param replacement
+   *          The path to replace with.
+   * @param removeOld
+   *          True if we are removing the current path.
    * 
-   * @throws IOException If an error occurs during replacement.
+   * @throws IOException
+   *           If an error occurs during replacement.
    */
   public static void replace(FileSystem fs, Path current, Path replacement,
-    boolean removeOld)
-    throws IOException {
+      boolean removeOld) throws IOException {
 
     // rename any current path to old
     Path old = new Path(current + ".old");
@@ -60,12 +64,14 @@ public class FSUtils {
   /**
    * Closes a group of SequenceFile readers.
    * 
-   * @param readers The SequenceFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The SequenceFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
   public static void closeReaders(SequenceFile.Reader[] readers)
-    throws IOException {
-    
+      throws IOException {
+
     // loop through the readers, closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
@@ -80,12 +86,13 @@ public class FSUtils {
   /**
    * Closes a group of MapFile readers.
    * 
-   * @param readers The MapFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The MapFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
-  public static void closeReaders(MapFile.Reader[] readers)
-    throws IOException {
-    
+  public static void closeReaders(MapFile.Reader[] readers) throws IOException 
{
+
     // loop through the readers closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/GZIPUtils.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/GZIPUtils.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/GZIPUtils.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/GZIPUtils.java Fri Jan  9 
06:34:33 2015
@@ -28,19 +28,18 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on GZIPed data.
+ * A collection of utility methods for working on GZIPed data.
  */
 public class GZIPUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(GZIPUtils.class);
-  private static final int EXPECTED_COMPRESSION_RATIO= 5;
-  private static final int BUF_SIZE= 4096;
+  private static final int EXPECTED_COMPRESSION_RATIO = 5;
+  private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an gunzipped copy of the input array.  If the gzipped
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an gunzipped copy of the input array. If the gzipped input has 
been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in) {
     return unzipBestEffort(in, Integer.MAX_VALUE);
@@ -48,33 +47,32 @@ public class GZIPUtils {
 
   /**
    * Returns an gunzipped copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the gzipped input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in, int sizeLimit) {
     try {
-      // decompress using GZIPInputStream 
-      ByteArrayOutputStream outStream = 
-        new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+      // decompress using GZIPInputStream
+      ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+          EXPECTED_COMPRESSION_RATIO * in.length);
 
-      GZIPInputStream inStream = 
-        new GZIPInputStream ( new ByteArrayInputStream(in) );
+      GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(
+          in));
 
       byte[] buf = new byte[BUF_SIZE];
       int written = 0;
       while (true) {
         try {
           int size = inStream.read(buf);
-          if (size <= 0) 
+          if (size <= 0)
             break;
           if ((written + size) > sizeLimit) {
             outStream.write(buf, 0, sizeLimit - written);
             break;
           }
           outStream.write(buf, 0, size);
-          written+= size;
+          written += size;
         } catch (Exception e) {
           break;
         }
@@ -91,23 +89,23 @@ public class GZIPUtils {
     }
   }
 
-
   /**
-   * Returns an gunzipped copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an gunzipped copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] unzip(byte[] in) throws IOException {
-    // decompress using GZIPInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using GZIPInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    GZIPInputStream inStream = 
-      new GZIPInputStream ( new ByteArrayInputStream(in) );
+    GZIPInputStream inStream = new GZIPInputStream(new 
ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -121,11 +119,11 @@ public class GZIPUtils {
    */
   public static final byte[] zip(byte[] in) {
     try {
-      // compress using GZIPOutputStream 
-      ByteArrayOutputStream byteOut= 
-        new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+      // compress using GZIPOutputStream
+      ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+          / EXPECTED_COMPRESSION_RATIO);
 
-      GZIPOutputStream outStream= new GZIPOutputStream(byteOut);
+      GZIPOutputStream outStream = new GZIPOutputStream(byteOut);
 
       try {
         outStream.write(in);
@@ -142,9 +140,9 @@ public class GZIPUtils {
       return byteOut.toByteArray();
 
     } catch (IOException e) {
-        LOG.error("Failed with IOException", e);
+      LOG.error("Failed with IOException", e);
       return null;
     }
   }
-    
+
 }

Modified: 
nutch/branches/2.x/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/GenericWritableConfigurable.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- 
nutch/branches/2.x/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
 (original)
+++ 
nutch/branches/2.x/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
 Fri Jan  9 06:34:33 2015
@@ -24,12 +24,15 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.io.GenericWritable;
 import org.apache.hadoop.io.Writable;
 
-/** A generic Writable wrapper that can inject Configuration to {@link 
Configurable}s */ 
-public abstract class GenericWritableConfigurable extends GenericWritable 
-                                                  implements Configurable {
+/**
+ * A generic Writable wrapper that can inject Configuration to
+ * {@link Configurable}s
+ */
+public abstract class GenericWritableConfigurable extends GenericWritable
+    implements Configurable {
 
   private Configuration conf;
-  
+
   public Configuration getConf() {
     return conf;
   }
@@ -37,7 +40,7 @@ public abstract class GenericWritableCon
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  
+
   @Override
   public void readFields(DataInput in) throws IOException {
     byte type = in.readByte();
@@ -50,8 +53,8 @@ public abstract class GenericWritableCon
     }
     Writable w = get();
     if (w instanceof Configurable)
-      ((Configurable)w).setConf(conf);
+      ((Configurable) w).setConf(conf);
     w.readFields(in);
   }
-  
+
 }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/HadoopFSUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/HadoopFSUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/HadoopFSUtil.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/HadoopFSUtil.java Fri Jan 
 9 06:34:33 2015
@@ -25,48 +25,48 @@ import org.apache.hadoop.fs.PathFilter;
 
 public class HadoopFSUtil {
 
-    /**
-     * Returns PathFilter that passes all paths through.
-     */
-    public static PathFilter getPassAllFilter() {
-        return new PathFilter() {
-            public boolean accept(Path arg0) {
-                return true;
-            }
-        };
-    }
+  /**
+   * Returns PathFilter that passes all paths through.
+   */
+  public static PathFilter getPassAllFilter() {
+    return new PathFilter() {
+      public boolean accept(Path arg0) {
+        return true;
+      }
+    };
+  }
+
+  /**
+   * Returns PathFilter that passes directories through.
+   */
+  public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
+    return new PathFilter() {
+      public boolean accept(final Path path) {
+        try {
+          return fs.getFileStatus(path).isDir();
+        } catch (IOException ioe) {
+          return false;
+        }
+      }
 
-    /**
-     * Returns PathFilter that passes directories through.
-     */
-    public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
-        return new PathFilter() {
-            public boolean accept(final Path path) {
-                try {
-                    return fs.getFileStatus(path).isDir();
-                } catch (IOException ioe) {
-                    return false;
-                }
-            }
+    };
+  }
 
-        };
+  /**
+   * Turns an array of FileStatus into an array of Paths.
+   */
+  public static Path[] getPaths(FileStatus[] stats) {
+    if (stats == null) {
+      return null;
     }
-    
-    /**
-     * Turns an array of FileStatus into an array of Paths.
-     */
-    public static Path[] getPaths(FileStatus[] stats) {
-      if (stats == null) {
-        return null;
-      }
-      if (stats.length == 0) {
-        return new Path[0];
-      }
-      Path[] res = new Path[stats.length];
-      for (int i = 0; i < stats.length; i++) {
-        res[i] = stats[i].getPath();
-      }
-      return res;
+    if (stats.length == 0) {
+      return new Path[0];
+    }
+    Path[] res = new Path[stats.length];
+    for (int i = 0; i < stats.length; i++) {
+      res[i] = stats[i].getPath();
     }
+    return res;
+  }
 
 }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/Histogram.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/Histogram.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/Histogram.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/Histogram.java Fri Jan  9 
06:34:33 2015
@@ -72,8 +72,8 @@ public class Histogram<E> {
   }
 
   public List<E> sortInverseByValue() {
-    List<Map.Entry<E, HistogramEntry>> list = 
-        new Vector<Map.Entry<E, HistogramEntry>>(map.entrySet());
+    List<Map.Entry<E, HistogramEntry>> list = new Vector<Map.Entry<E, 
HistogramEntry>>(
+        map.entrySet());
 
     // Sort the list using an annonymous inner class implementing Comparator 
for
     // the compare method
@@ -93,8 +93,8 @@ public class Histogram<E> {
   }
 
   public List<E> sortByValue() {
-    List<Map.Entry<E, HistogramEntry>> list = 
-        new Vector<Map.Entry<E, HistogramEntry>>(map.entrySet());
+    List<Map.Entry<E, HistogramEntry>> list = new Vector<Map.Entry<E, 
HistogramEntry>>(
+        map.entrySet());
 
     // Sort the list using an annonymous inner class implementing Comparator 
for
     // the compare method

Modified: 
nutch/branches/2.x/src/java/org/apache/nutch/util/IdentityPageReducer.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/IdentityPageReducer.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/IdentityPageReducer.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/IdentityPageReducer.java 
Fri Jan  9 06:34:33 2015
@@ -21,12 +21,12 @@ import java.io.IOException;
 import org.apache.nutch.storage.WebPage;
 import org.apache.gora.mapreduce.GoraReducer;
 
-public class IdentityPageReducer
-extends GoraReducer<String, WebPage, String, WebPage> {
+public class IdentityPageReducer extends
+    GoraReducer<String, WebPage, String, WebPage> {
 
   @Override
-  protected void reduce(String key, Iterable<WebPage> values,
-      Context context) throws IOException, InterruptedException {
+  protected void reduce(String key, Iterable<WebPage> values, Context context)
+      throws IOException, InterruptedException {
     for (WebPage page : values) {
       context.write(key, page);
     }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/LockUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/LockUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/LockUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/LockUtil.java Fri Jan  9 
06:34:33 2015
@@ -28,22 +28,29 @@ import org.apache.hadoop.fs.Path;
  * @author Andrzej Bialecki
  */
 public class LockUtil {
-  
+
   /**
    * Create a lock file.
-   * @param fs filesystem
-   * @param lockFile name of the lock file
-   * @param accept if true, and the target file exists, consider it valid. If 
false
-   * and the target file exists, throw an IOException.
-   * @throws IOException if accept is false, and the target file already 
exists,
-   * or if it's a directory.
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          name of the lock file
+   * @param accept
+   *          if true, and the target file exists, consider it valid. If false
+   *          and the target file exists, throw an IOException.
+   * @throws IOException
+   *           if accept is false, and the target file already exists, or if
+   *           it's a directory.
    */
-  public static void createLockFile(FileSystem fs, Path lockFile, boolean 
accept) throws IOException {
+  public static void createLockFile(FileSystem fs, Path lockFile, boolean 
accept)
+      throws IOException {
     if (fs.exists(lockFile)) {
-      if(!accept)
+      if (!accept)
         throw new IOException("lock file " + lockFile + " already exists.");
       if (fs.getFileStatus(lockFile).isDir())
-        throw new IOException("lock file " + lockFile + " already exists and 
is a directory.");
+        throw new IOException("lock file " + lockFile
+            + " already exists and is a directory.");
       // do nothing - the file already exists.
     } else {
       // make sure parents exist
@@ -55,16 +62,23 @@ public class LockUtil {
   /**
    * Remove lock file. NOTE: applications enforce the semantics of this file -
    * this method simply removes any file with a given name.
-   * @param fs filesystem
-   * @param lockFile lock file name
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          lock file name
    * @return false, if the lock file doesn't exist. True, if it existed and was
-   * successfully removed.
-   * @throws IOException if lock file exists but it is a directory.
+   *         successfully removed.
+   * @throws IOException
+   *           if lock file exists but it is a directory.
    */
-  public static boolean removeLockFile(FileSystem fs, Path lockFile) throws 
IOException {
-    if (!fs.exists(lockFile)) return false;
+  public static boolean removeLockFile(FileSystem fs, Path lockFile)
+      throws IOException {
+    if (!fs.exists(lockFile))
+      return false;
     if (fs.getFileStatus(lockFile).isDir())
-      throw new IOException("lock file " + lockFile + " exists but is a 
directory!");
+      throw new IOException("lock file " + lockFile
+          + " exists but is a directory!");
     return fs.delete(lockFile, false);
   }
 }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java Fri Jan  9 
06:34:33 2015
@@ -37,7 +37,7 @@ import org.apache.tika.mime.MimeTypesFac
 // Slf4j logging imports
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
- 
+
 // imported for Javadoc
 import org.apache.nutch.protocol.ProtocolOutput;
 
@@ -45,12 +45,12 @@ import org.apache.nutch.protocol.Protoco
  * @author mattmann
  * @since NUTCH-608
  * 
- * <p>
- * This is a facade class to insulate Nutch from its underlying Mime Type
- * substrate library, <a href="http://incubator.apache.org/tika/";>Apache 
Tika</a>.
- * Any mime handling code should be placed in this utility class, and hidden
- * from the Nutch classes that rely on it.
- * </p>
+ *        <p>
+ *        This is a facade class to insulate Nutch from its underlying Mime 
Type
+ *        substrate library, <a href="http://incubator.apache.org/tika/";>Apache
+ *        Tika</a>. Any mime handling code should be placed in this utility
+ *        class, and hidden from the Nutch classes that rely on it.
+ *        </p>
  */
 public final class MimeUtil {
 
@@ -66,7 +66,8 @@ public final class MimeUtil {
   private boolean mimeMagic;
 
   /* our log stream */
-  private static final Logger LOG = 
LoggerFactory.getLogger(MimeUtil.class.getName());
+  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class
+      .getName());
 
   public MimeUtil(Configuration conf) {
     tika = new Tika();
@@ -75,25 +76,26 @@ public final class MimeUtil {
         .getName());
     if (mimeTypez == null) {
       try {
-          String customMimeTypeFile = conf.get("mime.types.file");
-          if (customMimeTypeFile!=null && 
customMimeTypeFile.equals("")==false){
-              try {
-              mimeTypez = MimeTypesFactory.create(conf
-                      .getConfResourceAsInputStream(customMimeTypeFile));
-              }
-              catch (Exception e){
-                  LOG.error("Can't load mime.types.file : 
"+customMimeTypeFile+" using Tika's default");
-              }
+        String customMimeTypeFile = conf.get("mime.types.file");
+        if (customMimeTypeFile != null
+            && customMimeTypeFile.equals("") == false) {
+          try {
+            mimeTypez = MimeTypesFactory.create(conf
+                .getConfResourceAsInputStream(customMimeTypeFile));
+          } catch (Exception e) {
+            LOG.error("Can't load mime.types.file : " + customMimeTypeFile
+                + " using Tika's default");
           }
-          if (mimeTypez==null)
-              mimeTypez = MimeTypes.getDefaultMimeTypes();
+        }
+        if (mimeTypez == null)
+          mimeTypez = MimeTypes.getDefaultMimeTypes();
       } catch (Exception e) {
-        LOG.error("Exception in MimeUtil "+e.getMessage());
+        LOG.error("Exception in MimeUtil " + e.getMessage());
         throw new RuntimeException(e);
       }
       objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
     }
-    
+
     this.mimeTypes = mimeTypez;
     this.mimeMagic = conf.getBoolean("mime.type.magic", true);
   }
@@ -129,14 +131,13 @@ public final class MimeUtil {
   /**
    * A facade interface to trying all the possible mime type resolution
    * strategies available within Tika. First, the mime type provided in
-   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
-   * Then the cleaned mime type is looked up in the underlying Tika
-   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType}
-   * is found, then that mime type is used, otherwise URL resolution is
-   * used to try and determine the mime type. However, if
-   * <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
-   * then mime type magic resolution is used to try and obtain a
-   * better-than-the-default approximation of the {@link MimeType}.
+   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. 
Then
+   * the cleaned mime type is looked up in the underlying Tika {@link 
MimeTypes}
+   * registry, by its cleaned name. If the {@link MimeType} is found, then that
+   * mime type is used, otherwise URL resolution is used to try and determine
+   * the mime type. However, if <code>mime.type.magic</code> is enabled in
+   * {@link NutchConfiguration}, then mime type magic resolution is used to try
+   * and obtain a better-than-the-default approximation of the {@link 
MimeType}.
    * 
    * @param typeName
    *          The original mime type, returned from a {@link ProtocolOutput}.
@@ -177,7 +178,7 @@ public final class MimeUtil {
         throw new RuntimeException(e);
       }
     } else {
-        retType = type.getName();
+      retType = type.getName();
     }
 
     // if magic is enabled use mime magic to guess if the mime type returned
@@ -195,14 +196,15 @@ public final class MimeUtil {
         InputStream stream = TikaInputStream.get(data);
         try {
           magicType = tika.detect(stream, tikaMeta);
-       } finally {
-         stream.close();
+        } finally {
+          stream.close();
         }
-      } catch (IOException ignore) {}
+      } catch (IOException ignore) {
+      }
 
       if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM)
-          && !magicType.equals(MimeTypes.PLAIN_TEXT)
-          && retType != null && !retType.equals(magicType)) {
+          && !magicType.equals(MimeTypes.PLAIN_TEXT) && retType != null
+          && !retType.equals(magicType)) {
 
         // If magic enabled and the current mime type differs from that of the
         // one returned from the magic, take the magic mimeType
@@ -225,12 +227,12 @@ public final class MimeUtil {
   /**
    * Facade interface to Tika's underlying {@link 
MimeTypes#getMimeType(String)}
    * method.
-   *
+   * 
    * @param url
    *          A string representation of the document {@link URL} to sense the
    *          {@link MimeType} for.
-   * @return An appropriate {@link MimeType}, identified from the given
-   *         Document url in string form.
+   * @return An appropriate {@link MimeType}, identified from the given 
Document
+   *         url in string form.
    */
   public String getMimeType(String url) {
     return tika.detect(url);
@@ -239,11 +241,11 @@ public final class MimeUtil {
   /**
    * A facade interface to Tika's underlying {@link MimeTypes#forName(String)}
    * method.
-   *
+   * 
    * @param name
    *          The name of a valid {@link MimeType} in the Tika mime registry.
-   * @return The object representation of the {@link MimeType}, if it exists,
-   *         or null otherwise.
+   * @return The object representation of the {@link MimeType}, if it exists, 
or
+   *         null otherwise.
    */
   public String forName(String name) {
     try {
@@ -258,7 +260,7 @@ public final class MimeUtil {
   /**
    * Facade interface to Tika's underlying {@link MimeTypes#getMimeType(File)}
    * method.
-   *
+   * 
    * @param f
    *          The {@link File} to sense the {@link MimeType} for.
    * @return The {@link MimeType} of the given {@link File}, or null if it

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NodeWalker.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NodeWalker.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/NodeWalker.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/NodeWalker.java Fri Jan  
9 06:34:33 2015
@@ -22,13 +22,17 @@ import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
 /**
- * <p>A utility class that allows the walking of any DOM tree using a stack 
- * instead of recursion.  As the node tree is walked the next node is popped
- * off of the stack and all of its children are automatically added to the 
- * stack to be called in tree order.</p>
+ * <p>
+ * A utility class that allows the walking of any DOM tree using a stack 
instead
+ * of recursion. As the node tree is walked the next node is popped off of the
+ * stack and all of its children are automatically added to the stack to be
+ * called in tree order.
+ * </p>
  * 
- * <p>Currently this class is not thread safe.  It is assumed that only one
- * thread will be accessing the <code>NodeWalker</code> at any given time.</p>
+ * <p>
+ * Currently this class is not thread safe. It is assumed that only one thread
+ * will be accessing the <code>NodeWalker</code> at any given time.
+ * </p>
  */
 public class NodeWalker {
 
@@ -36,7 +40,7 @@ public class NodeWalker {
   private Node currentNode;
   private NodeList currentChildren;
   private Stack<Node> nodes;
-  
+
   /**
    * Starts the <code>Node</code> tree from the root node.
    * 
@@ -47,69 +51,77 @@ public class NodeWalker {
     nodes = new Stack<Node>();
     nodes.add(rootNode);
   }
-  
+
   /**
-   * <p>Returns the next <code>Node</code> on the stack and pushes all of its
-   * children onto the stack, allowing us to walk the node tree without the
-   * use of recursion.  If there are no more nodes on the stack then null is
-   * returned.</p>
+   * <p>
+   * Returns the next <code>Node</code> on the stack and pushes all of its
+   * children onto the stack, allowing us to walk the node tree without the use
+   * of recursion. If there are no more nodes on the stack then null is
+   * returned.
+   * </p>
    * 
-   * @return Node The next <code>Node</code> on the stack or null if there
-   * isn't a next node.
+   * @return Node The next <code>Node</code> on the stack or null if there 
isn't
+   *         a next node.
    */
   public Node nextNode() {
-    
+
     // if no next node return null
     if (!hasNext()) {
       return null;
     }
-    
+
     // pop the next node off of the stack and push all of its children onto
     // the stack
     currentNode = nodes.pop();
     currentChildren = currentNode.getChildNodes();
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
+
     // put the children node on the stack in first to last order
     for (int i = childLen - 1; i >= 0; i--) {
       nodes.add(currentChildren.item(i));
     }
-    
+
     return currentNode;
   }
-  
+
   /**
-   * <p>Skips over and removes from the node stack the children of the last
-   * node.  When getting a next node from the walker, that node's children 
-   * are automatically added to the stack.  You can call this method to remove
-   * those children from the stack.</p>
-   * 
-   * <p>This is useful when you don't want to process deeper into the 
-   * current path of the node tree but you want to continue processing sibling
-   * nodes.</p>
-   *
+   * <p>
+   * Skips over and removes from the node stack the children of the last node.
+   * When getting a next node from the walker, that node's children are
+   * automatically added to the stack. You can call this method to remove those
+   * children from the stack.
+   * </p>
+   * 
+   * <p>
+   * This is useful when you don't want to process deeper into the current path
+   * of the node tree but you want to continue processing sibling nodes.
+   * </p>
+   * 
    */
   public void skipChildren() {
-    
+
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
-    for (int i = 0 ; i < childLen ; i++) {
+
+    for (int i = 0; i < childLen; i++) {
       Node child = nodes.peek();
       if (child.equals(currentChildren.item(i))) {
         nodes.pop();
       }
     }
   }
-  
+
   /**
    * Return the current node.
+   * 
    * @return Node
    */
   public Node getCurrentNode() {
     return currentNode;
   }
-  
-  /**   * Returns true if there are more nodes on the current stack.
+
+  /**
+   * * Returns true if there are more nodes on the current stack.
+   * 
    * @return
    */
   public boolean hasNext() {

Modified: 
nutch/branches/2.x/src/java/org/apache/nutch/util/NutchConfiguration.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NutchConfiguration.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/NutchConfiguration.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/NutchConfiguration.java 
Fri Jan  9 06:34:33 2015
@@ -23,37 +23,42 @@ import java.util.UUID;
 
 import org.apache.hadoop.conf.Configuration;
 
-
-/** Utility to create Hadoop {@link Configuration}s that include Nutch-specific
- * resources.  */
+/**
+ * Utility to create Hadoop {@link Configuration}s that include Nutch-specific
+ * resources.
+ */
 public class NutchConfiguration {
   public static final String UUID_KEY = "nutch.conf.uuid";
-  
-  private NutchConfiguration() {}                 // singleton
-  
+
+  private NutchConfiguration() {
+  } // singleton
+
   /*
-   * Configuration.hashCode() doesn't return values that
-   * correspond to a unique set of parameters. This is a workaround
-   * so that we can track instances of Configuration created by Nutch.
+   * Configuration.hashCode() doesn't return values that correspond to a unique
+   * set of parameters. This is a workaround so that we can track instances of
+   * Configuration created by Nutch.
    */
   private static void setUUID(Configuration conf) {
     UUID uuid = UUID.randomUUID();
     conf.set(UUID_KEY, uuid.toString());
   }
-  
+
   /**
-   * Retrieve a Nutch UUID of this configuration object, or null
-   * if the configuration was created elsewhere.
-   * @param conf configuration instance
+   * Retrieve a Nutch UUID of this configuration object, or null if the
+   * configuration was created elsewhere.
+   * 
+   * @param conf
+   *          configuration instance
    * @return uuid or null
    */
   public static String getUUID(Configuration conf) {
     return conf.get(UUID_KEY);
   }
 
-  /** Create a {@link Configuration} for Nutch. This will load the standard
-   * Nutch resources, <code>nutch-default.xml</code> and
-   * <code>nutch-site.xml</code> overrides.
+  /**
+   * Create a {@link Configuration} for Nutch. This will load the standard 
Nutch
+   * resources, <code>nutch-default.xml</code> and <code>nutch-site.xml</code>
+   * overrides.
    */
   public static Configuration create() {
     Configuration conf = new Configuration();
@@ -61,14 +66,19 @@ public class NutchConfiguration {
     addNutchResources(conf);
     return conf;
   }
-  
-  /** Create a {@link Configuration} from supplied properties.
-   * @param addNutchResources if true, then first 
<code>nutch-default.xml</code>,
-   * and then <code>nutch-site.xml</code> will be loaded prior to applying the
-   * properties. Otherwise these resources won't be used.
-   * @param nutchProperties a set of properties to define (or override)
+
+  /**
+   * Create a {@link Configuration} from supplied properties.
+   * 
+   * @param addNutchResources
+   *          if true, then first <code>nutch-default.xml</code>, and then
+   *          <code>nutch-site.xml</code> will be loaded prior to applying the
+   *          properties. Otherwise these resources won't be used.
+   * @param nutchProperties
+   *          a set of properties to define (or override)
    */
-  public static Configuration create(boolean addNutchResources, Properties 
nutchProperties) {
+  public static Configuration create(boolean addNutchResources,
+      Properties nutchProperties) {
     Configuration conf = new Configuration();
     setUUID(conf);
     if (addNutchResources) {
@@ -83,8 +93,8 @@ public class NutchConfiguration {
   /**
    * Add the standard Nutch resources to {@link Configuration}.
    * 
-   * @param conf               Configuration object to which
-   *                           configuration is to be added.
+   * @param conf
+   *          Configuration object to which configuration is to be added.
    */
   private static Configuration addNutchResources(Configuration conf) {
     conf.addResource("nutch-default.xml");
@@ -92,4 +102,3 @@ public class NutchConfiguration {
     return conf;
   }
 }
-

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java Fri Jan  9 
06:34:33 2015
@@ -35,10 +35,10 @@ public class NutchJob extends Job {
 
   public NutchJob(Configuration conf, String jobName) throws IOException {
     super(conf, jobName);
-    //prefix jobName with crawlId if not empty
+    // prefix jobName with crawlId if not empty
     String crawlId = conf.get("storage.crawl.id");
     if (!StringUtils.isEmpty(crawlId)) {
-      jobName = "["+crawlId+"]"+jobName;
+      jobName = "[" + crawlId + "]" + jobName;
       setJobName(jobName);
     }
     setJarByClass(this.getClass());

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJobConf.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJobConf.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJobConf.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJobConf.java Fri Jan 
 9 06:34:33 2015
@@ -20,7 +20,7 @@ package org.apache.nutch.util;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobConf;
 
-/** A {@link JobConf} for Nutch jobs.  */
+/** A {@link JobConf} for Nutch jobs. */
 public class NutchJobConf extends JobConf {
 
   public NutchJobConf(Configuration conf) {
@@ -28,4 +28,3 @@ public class NutchJobConf extends JobCon
   }
 
 }
-

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NutchTool.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NutchTool.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/NutchTool.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/NutchTool.java Fri Jan  9 
06:34:33 2015
@@ -26,19 +26,20 @@ import org.apache.hadoop.mapreduce.Job;
 import org.apache.nutch.metadata.Nutch;
 
 public abstract class NutchTool extends Configured {
-  
-  protected HashMap<String,Object> results = new HashMap<String,Object>();
-  protected Map<String,Object> status =
-    Collections.synchronizedMap(new HashMap<String,Object>());
+
+  protected HashMap<String, Object> results = new HashMap<String, Object>();
+  protected Map<String, Object> status = Collections
+      .synchronizedMap(new HashMap<String, Object>());
   protected Job currentJob;
   protected int numJobs;
   protected int currentJobNum;
-  
-  /** Runs the tool, using a map of arguments.
-   * May return results, or null.
+
+  /**
+   * Runs the tool, using a map of arguments. May return results, or null.
    */
-  public abstract Map<String,Object> run(Map<String,Object> args) throws 
Exception;
-  
+  public abstract Map<String, Object> run(Map<String, Object> args)
+      throws Exception;
+
   /** Returns relative progress of the tool, a float in range [0,1]. */
   public float getProgress() {
     float res = 0;
@@ -55,29 +56,31 @@ public abstract class NutchTool extends
     }
     // take into account multiple jobs
     if (numJobs > 1) {
-      res = (currentJobNum + res) / (float)numJobs;
+      res = (currentJobNum + res) / (float) numJobs;
     }
     status.put(Nutch.STAT_PROGRESS, res);
     return res;
   }
-  
-  
+
   /** Returns current status of the running tool. */
-  public Map<String,Object> getStatus() {
+  public Map<String, Object> getStatus() {
     return status;
   }
-  
-  /** Stop the job with the possibility to resume. Subclasses should
-   * override this, since by default it calls {@link #killJob()}.
+
+  /**
+   * Stop the job with the possibility to resume. Subclasses should override
+   * this, since by default it calls {@link #killJob()}.
+   * 
    * @return true if succeeded, false otherwise
    */
   public boolean stopJob() throws Exception {
     return killJob();
   }
-  
+
   /**
-   * Kill the job immediately. Clients should assume that any results
-   * that the job produced so far are in inconsistent state or missing.
+   * Kill the job immediately. Clients should assume that any results that the
+   * job produced so far are in inconsistent state or missing.
+   * 
    * @return true if succeeded, false otherwise.
    * @throws Exception
    */

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/ObjectCache.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/ObjectCache.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/ObjectCache.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/ObjectCache.java Fri Jan  
9 06:34:33 2015
@@ -24,35 +24,33 @@ import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
 
 public class ObjectCache {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class);
-  
-  private static final WeakHashMap<Configuration, ObjectCache> CACHE = 
-    new WeakHashMap<Configuration, ObjectCache>();
+
+  private static final WeakHashMap<Configuration, ObjectCache> CACHE = new 
WeakHashMap<Configuration, ObjectCache>();
 
   private final HashMap<String, Object> objectMap;
-  
+
   private ObjectCache() {
     objectMap = new HashMap<String, Object>();
   }
-  
+
   public static ObjectCache get(Configuration conf) {
     ObjectCache objectCache = CACHE.get(conf);
     if (objectCache == null) {
-      LOG.debug("No object cache found for conf=" + conf 
-                  + ", instantiating a new object cache");
+      LOG.debug("No object cache found for conf=" + conf
+          + ", instantiating a new object cache");
       objectCache = new ObjectCache();
       CACHE.put(conf, objectCache);
     }
     return objectCache;
   }
-  
+
   public Object getObject(String key) {
     return objectMap.get(key);
   }
-  
+
   public void setObject(String key, Object value) {
     objectMap.put(key, value);
   }
 }
-

Modified: 
nutch/branches/2.x/src/java/org/apache/nutch/util/PrefixStringMatcher.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/PrefixStringMatcher.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/PrefixStringMatcher.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/PrefixStringMatcher.java 
Fri Jan  9 06:34:33 2015
@@ -21,46 +21,47 @@ import java.util.Collection;
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of prefixes.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * prefixes.
  */
 public class PrefixStringMatcher extends TrieStringMatcher {
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied array.
-   * Zero-length <code>Strings</code> are ignored.
+   * <code>String</code>s with any prefix in the supplied array. Zero-length
+   * <code>Strings</code> are ignored.
    */
   public PrefixStringMatcher(String[] prefixes) {
     super();
-    for (int i= 0; i < prefixes.length; i++)
+    for (int i = 0; i < prefixes.length; i++)
       addPatternForward(prefixes[i]);
   }
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied    
+   * <code>String</code>s with any prefix in the supplied
    * <code>Collection</code>.
-   *
-   * @throws ClassCastException if any <code>Object</code>s in the
-   * collection are not <code>String</code>s
+   * 
+   * @throws ClassCastException
+   *           if any <code>Object</code>s in the collection are not
+   *           <code>String</code>s
    */
   public PrefixStringMatcher(Collection<String> prefixes) {
     super();
-    Iterator<String> iter= prefixes.iterator();
+    Iterator<String> iter = prefixes.iterator();
     while (iter.hasNext())
       addPatternForward(iter.next());
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * prefix in the trie
+   * Returns true if the given <code>String</code> is matched by a prefix in 
the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -73,13 +74,13 @@ public class PrefixStringMatcher extends
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
-        return input.substring(0, i+1);
+        return input.substring(0, i + 1);
     }
     return null;
   }
@@ -89,29 +90,26 @@ public class PrefixStringMatcher extends
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(0, i+1);
+        result = input.substring(0, i + 1);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    PrefixStringMatcher matcher= 
-      new PrefixStringMatcher( 
-        new String[] 
-        {"abcd", "abc", "aac", "baz", "foo", "foobar"} );
-
-    String[] tests= {"a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
-                     "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                     "kite", };
+    PrefixStringMatcher matcher = new PrefixStringMatcher(new String[] {
+        "abcd", "abc", "aac", "baz", "foo", "foobar" });
 
-    for (int i= 0; i < tests.length; i++) {
+    String[] tests = { "a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
+
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java Fri Jan  
9 06:34:33 2015
@@ -20,42 +20,42 @@ package org.apache.nutch.util;
 import java.nio.ByteBuffer;
 
 /**
- * A collection of String processing utility methods. 
+ * A collection of String processing utility methods.
  */
 public class StringUtil {
 
   /**
-   * Returns a copy of <code>s</code> padded with trailing spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with trailing spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String rightPad(String s, int length) {
-    StringBuffer sb= new StringBuffer(s);
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer(s);
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     return sb.toString();
   }
 
   /**
-   * Returns a copy of <code>s</code> padded with leading spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with leading spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String leftPad(String s, int length) {
-    StringBuffer sb= new StringBuffer();
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer();
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     sb.append(s);
     return sb.toString();
   }
 
-
-  private static final char[] HEX_DIGITS =
-  {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
+  private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6',
+      '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
 
   /**
    * Convenience call for {@link #toHexString(ByteBuffer, String, int)}, where
    * <code>sep = null; lineLen = Integer.MAX_VALUE</code>.
+   * 
    * @param buf
    */
   public static String toHexString(ByteBuffer buf) {
@@ -65,19 +65,25 @@ public class StringUtil {
   /**
    * Get a text representation of a ByteBuffer as hexadecimal String, where 
each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param sep separate every pair of hexadecimal digits with this separator, 
or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for 
lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
   public static String toHexString(ByteBuffer buf, String sep, int lineLen) {
-    return toHexString(buf.array(), buf.arrayOffset() + buf.position(), 
buf.remaining(), sep, lineLen);
+    return toHexString(buf.array(), buf.arrayOffset() + buf.position(),
+        buf.remaining(), sep, lineLen);
   }
 
   /**
    * Convenience call for {@link #toHexString(byte[], String, int)}, where
    * <code>sep = null; lineLen = Integer.MAX_VALUE</code>.
+   * 
    * @param buf
    */
   public static String toHexString(byte[] buf) {
@@ -87,11 +93,15 @@ public class StringUtil {
   /**
    * Get a text representation of a byte[] as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param sep separate every pair of hexadecimal digits with this separator, 
or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for 
lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
   public static String toHexString(byte[] buf, String sep, int lineLen) {
     return toHexString(buf, 0, buf.length, sep, lineLen);
@@ -100,39 +110,53 @@ public class StringUtil {
   /**
    * Get a text representation of a byte[] as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param of the offset into the byte[] to start reading
-   * @param cb the number of bytes to read from the byte[]
-   * @param sep separate every pair of hexadecimal digits with this separator, 
or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for 
lineLen
-   * bytes.
-   */
-  public static String toHexString(byte[] buf, int of, int cb, String sep, int 
lineLen) {
-    if (buf == null) return null;
-    if (lineLen <= 0) lineLen = Integer.MAX_VALUE;
+   * 
+   * @param buf
+   *          input data
+   * @param of
+   *          the offset into the byte[] to start reading
+   * @param cb
+   *          the number of bytes to read from the byte[]
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
+   */
+  public static String toHexString(byte[] buf, int of, int cb, String sep,
+      int lineLen) {
+    if (buf == null)
+      return null;
+    if (lineLen <= 0)
+      lineLen = Integer.MAX_VALUE;
     StringBuffer res = new StringBuffer(cb * 2);
     for (int c = 0; c < cb; c++) {
       int b = buf[of++];
       res.append(HEX_DIGITS[(b >> 4) & 0xf]);
       res.append(HEX_DIGITS[b & 0xf]);
-      if (c > 0 && (c % lineLen) == 0) res.append('\n');
-      else if (sep != null && c < lineLen - 1) res.append(sep);
+      if (c > 0 && (c % lineLen) == 0)
+        res.append('\n');
+      else if (sep != null && c < lineLen - 1)
+        res.append(sep);
     }
     return res.toString();
   }
-  
+
   /**
    * Convert a String containing consecutive (no inside whitespace) hexadecimal
-   * digits into a corresponding byte array. If the number of digits is not 
even,
-   * a '0' will be appended in the front of the String prior to conversion.
-   * Leading and trailing whitespace is ignored.
-   * @param text input text
+   * digits into a corresponding byte array. If the number of digits is not
+   * even, a '0' will be appended in the front of the String prior to
+   * conversion. Leading and trailing whitespace is ignored.
+   * 
+   * @param text
+   *          input text
    * @return converted byte array, or null if unable to convert
    */
   public static byte[] fromHexString(String text) {
     text = text.trim();
-    if (text.length() % 2 != 0) text = "0" + text;
+    if (text.length() % 2 != 0)
+      text = "0" + text;
     int resLen = text.length() / 2;
     int loNibble, hiNibble;
     byte[] res = new byte[resLen];
@@ -140,12 +164,13 @@ public class StringUtil {
       int j = i << 1;
       hiNibble = charToNibble(text.charAt(j));
       loNibble = charToNibble(text.charAt(j + 1));
-      if (loNibble == -1 || hiNibble == -1) return null;
-      res[i] = (byte)(hiNibble << 4 | loNibble);
+      if (loNibble == -1 || hiNibble == -1)
+        return null;
+      res[i] = (byte) (hiNibble << 4 | loNibble);
     }
     return res;
   }
-  
+
   private static final int charToNibble(char c) {
     if (c >= '0' && c <= '9') {
       return c - '0';
@@ -164,11 +189,12 @@ public class StringUtil {
   public static boolean isEmpty(String str) {
     return (str == null) || (str.equals(""));
   }
-  
 
   /**
    * Takes in a String value and cleans out any offending "ï¿½"
-   * @param value the dirty String value.
+   * 
+   * @param value
+   *          the dirty String value.
    * @return clean String
    */
   public static String cleanField(String value) {
@@ -178,8 +204,8 @@ public class StringUtil {
   public static void main(String[] args) {
     if (args.length != 1)
       System.out.println("Usage: StringUtil <encoding name>");
-    else 
-      System.out.println(args[0] + " is resolved to " +
-                         EncodingDetector.resolveEncodingAlias(args[0]));
+    else
+      System.out.println(args[0] + " is resolved to "
+          + EncodingDetector.resolveEncodingAlias(args[0]));
   }
 }

Modified: 
nutch/branches/2.x/src/java/org/apache/nutch/util/SuffixStringMatcher.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/SuffixStringMatcher.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/SuffixStringMatcher.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/SuffixStringMatcher.java 
Fri Jan  9 06:34:33 2015
@@ -21,8 +21,8 @@ import java.util.Collection;
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of suffixes.  Zero-length <code>Strings</code> are ignored.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * suffixes. Zero-length <code>Strings</code> are ignored.
  */
 public class SuffixStringMatcher extends TrieStringMatcher {
 
@@ -32,7 +32,7 @@ public class SuffixStringMatcher extends
    */
   public SuffixStringMatcher(String[] suffixes) {
     super();
-    for (int i= 0; i < suffixes.length; i++)
+    for (int i = 0; i < suffixes.length; i++)
       addPatternBackward(suffixes[i]);
   }
 
@@ -49,14 +49,14 @@ public class SuffixStringMatcher extends
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * suffix in the trie
+   * Returns true if the given <code>String</code> is matched by a suffix in 
the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -64,16 +64,15 @@ public class SuffixStringMatcher extends
     return false;
   }
 
-
   /**
    * Returns the shortest suffix of <code>input<code> that is matched,
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
         return input.substring(i);
@@ -86,29 +85,26 @@ public class SuffixStringMatcher extends
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(i);
+        result = input.substring(i);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    SuffixStringMatcher matcher= 
-      new SuffixStringMatcher( 
-        new String[] 
-        {"a", "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar"} 
);
-
-    String[] tests= {"a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
-                    "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                    "kite", };
+    SuffixStringMatcher matcher = new SuffixStringMatcher(new String[] { "a",
+        "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar" });
+
+    String[] tests = { "a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java Fri Jan  9 
06:34:33 2015
@@ -33,7 +33,7 @@ public class TableUtil {
    * <p>
    * E.g. "http://bar.foo.com:8983/to/index.html?a=b"; becomes
    * "com.foo.bar:8983:http/to/index.html?a=b".
-   *
+   * 
    * @param url
    *          url to be reversed
    * @return Reversed url
@@ -50,7 +50,7 @@ public class TableUtil {
    * <p>
    * E.g. "http://bar.foo.com:8983/to/index.html?a=b"; becomes
    * "com.foo.bar:http:8983/to/index.html?a=b".
-   *
+   * 
    * @param url
    *          url to be reversed
    * @return Reversed url
@@ -93,8 +93,11 @@ public class TableUtil {
       pathBegin = reversedUrl.length();
     String sub = reversedUrl.substring(0, pathBegin);
 
-    String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // 
{<reversed host>, <port>, <protocol>}
-    
+    String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // 
{<reversed
+                                                                    // host>,
+                                                                    // <port>,
+                                                                    // 
<protocol>}
+
     buf.append(splits[1]); // add protocol
     buf.append("://");
     reverseAppendSplits(splits[0], buf); // splits[0] is reversed
@@ -110,7 +113,7 @@ public class TableUtil {
   /**
    * Given a reversed url, returns the reversed host E.g
    * "com.foo.bar:http:8983/to/index.html?a=b" -> "com.foo.bar"
-   *
+   * 
    * @param reversedUrl
    *          Reversed url
    * @return Reversed host
@@ -120,7 +123,7 @@ public class TableUtil {
   }
 
   private static void reverseAppendSplits(String string, StringBuilder buf) {
-    String[] splits = StringUtils.split(string,'.');
+    String[] splits = StringUtils.split(string, '.');
     if (splits.length > 0) {
       for (int i = splits.length - 1; i > 0; i--) {
         buf.append(splits[i]);
@@ -136,18 +139,18 @@ public class TableUtil {
     StringBuilder buf = new StringBuilder();
     reverseAppendSplits(hostName, buf);
     return buf.toString();
-    
+
   }
+
   public static String unreverseHost(String reversedHostName) {
     return reverseHost(reversedHostName); // Reversible
   }
-  
-  
+
   /**
-   * Convert given Utf8 instance to String and and cleans out 
-   * any offending "ï¿½" from the String.
-   *
-   *
+   * Convert given Utf8 instance to String and and cleans out any offending 
"ï¿½"
+   * from the String.
+   * 
+   * 
    * @param utf8
    *          Utf8 object
    * @return string-ifed Utf8 object or null if Utf8 instance is null

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/TimingUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/TimingUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/TimingUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/TimingUtil.java Fri Jan  
9 06:34:33 2015
@@ -21,35 +21,39 @@ import java.text.NumberFormat;
 
 public class TimingUtil {
 
-    private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
+  private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
 
-    /**
-     * Calculate the elapsed time between two times specified in milliseconds.
-     * @param start The start of the time period
-     * @param end The end of the time period
-     * @return a string of the form "XhYmZs" when the elapsed time is X hours, 
Y minutes and Z seconds or null if start > end.
-     */
-    public static String elapsedTime(long start, long end){
-        if (start > end) {
-            return null;
-        }
-
-        long[] elapsedTime = new long[TIME_FACTOR.length];
-
-        for (int i = 0; i < TIME_FACTOR.length; i++) {
-            elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
-            start += TIME_FACTOR[i] * elapsedTime[i];
-        }
-
-        NumberFormat nf = NumberFormat.getInstance();
-        nf.setMinimumIntegerDigits(2);
-        StringBuffer buf = new StringBuffer();
-        for (int i = 0; i < elapsedTime.length; i++) {
-            if (i > 0) {
-                buf.append(":");
-            }
-            buf.append(nf.format(elapsedTime[i]));
-        }
-        return buf.toString();
+  /**
+   * Calculate the elapsed time between two times specified in milliseconds.
+   * 
+   * @param start
+   *          The start of the time period
+   * @param end
+   *          The end of the time period
+   * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y
+   *         minutes and Z seconds or null if start > end.
+   */
+  public static String elapsedTime(long start, long end) {
+    if (start > end) {
+      return null;
     }
+
+    long[] elapsedTime = new long[TIME_FACTOR.length];
+
+    for (int i = 0; i < TIME_FACTOR.length; i++) {
+      elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
+      start += TIME_FACTOR[i] * elapsedTime[i];
+    }
+
+    NumberFormat nf = NumberFormat.getInstance();
+    nf.setMinimumIntegerDigits(2);
+    StringBuffer buf = new StringBuffer();
+    for (int i = 0; i < elapsedTime.length; i++) {
+      if (i > 0) {
+        buf.append(":");
+      }
+      buf.append(nf.format(elapsedTime[i]));
+    }
+    return buf.toString();
+  }
 }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/ToolUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/ToolUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/ToolUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/ToolUtil.java Fri Jan  9 
06:34:33 2015
@@ -28,14 +28,14 @@ import org.apache.nutch.metadata.Nutch;
 
 public class ToolUtil {
 
-  public static final Map<String,Object> toArgMap(Object... args) {
+  public static final Map<String, Object> toArgMap(Object... args) {
     if (args == null) {
       return null;
     }
     if (args.length % 2 != 0) {
       throw new RuntimeException("expected pairs of argName argValue");
     }
-    HashMap<String,Object> res = new HashMap<String,Object>();
+    HashMap<String, Object> res = new HashMap<String, Object>();
     for (int i = 0; i < args.length; i += 2) {
       if (args[i + 1] != null) {
         res.put(String.valueOf(args[i]), args[i + 1]);
@@ -43,20 +43,22 @@ public class ToolUtil {
     }
     return res;
   }
-  
+
   @SuppressWarnings("unchecked")
-  public static final void recordJobStatus(String label, Job job, 
Map<String,Object> results) {
-    Map<String,Object> jobs = (Map<String,Object>)results.get(Nutch.STAT_JOBS);
+  public static final void recordJobStatus(String label, Job job,
+      Map<String, Object> results) {
+    Map<String, Object> jobs = (Map<String, Object>) results
+        .get(Nutch.STAT_JOBS);
     if (jobs == null) {
-      jobs = new LinkedHashMap<String,Object>();
+      jobs = new LinkedHashMap<String, Object>();
       results.put(Nutch.STAT_JOBS, jobs);
     }
-    Map<String,Object> stats = new HashMap<String,Object>();
-    Map<String,Object> countStats = new HashMap<String,Object>();
+    Map<String, Object> stats = new HashMap<String, Object>();
+    Map<String, Object> countStats = new HashMap<String, Object>();
     try {
       Counters counters = job.getCounters();
       for (CounterGroup cg : counters) {
-        Map<String,Object> cnts = new HashMap<String,Object>();
+        Map<String, Object> cnts = new HashMap<String, Object>();
         countStats.put(cg.getDisplayName(), cnts);
         for (Counter c : cg) {
           cnts.put(c.getName(), c.getValue());

svn commit: r1650447 [12/25] - in /nutch/branches/2.x: ./ src/java/org/apache/nutch/api/ src/java/org/apache/nutch/api/impl/ src/java/org/apache/nutch/api/impl/db/ src/java/org/apache/nutch/api/model/response/ src/java/org/apache/nutch/api/resources/ s...

Reply via email to