pr...

lewismc Wed, 28 Jan 2015 21:39:34 -0800

Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java Thu Jan 29 
05:38:59 2015
@@ -38,18 +38,18 @@ import org.apache.nutch.plugin.PluginRep
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.ObjectCache;
 
-
-/** Creates and caches {@link Parser} plugins.*/
+/** Creates and caches {@link Parser} plugins. */
 public final class ParserFactory {
-  
+
   public static final Logger LOG = 
LoggerFactory.getLogger(ParserFactory.class);
-  
+
   /** Wildcard for default plugins. */
   public static final String DEFAULT_PLUGIN = "*";
-  
+
   /** Empty extension list for caching purposes. */
-  private final List<Extension> EMPTY_EXTENSION_LIST = 
Collections.<Extension>emptyList();
-  
+  private final List<Extension> EMPTY_EXTENSION_LIST = Collections
+      .<Extension> emptyList();
+
   private Configuration conf;
   private ExtensionPoint extensionPoint;
   private ParsePluginList parsePluginList;
@@ -57,12 +57,15 @@ public final class ParserFactory {
   public ParserFactory(Configuration conf) {
     this.conf = conf;
     ObjectCache objectCache = ObjectCache.get(conf);
-    this.extensionPoint = 
PluginRepository.get(conf).getExtensionPoint(Parser.X_POINT_ID);
-    this.parsePluginList = 
(ParsePluginList)objectCache.getObject(ParsePluginList.class.getName());
-    
+    this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
+        Parser.X_POINT_ID);
+    this.parsePluginList = (ParsePluginList) objectCache
+        .getObject(ParsePluginList.class.getName());
+
     if (this.parsePluginList == null) {
       this.parsePluginList = new ParsePluginsReader().parse(conf);
-      objectCache.setObject(ParsePluginList.class.getName(), 
this.parsePluginList);
+      objectCache.setObject(ParsePluginList.class.getName(),
+          this.parsePluginList);
     }
 
     if (this.extensionPoint == null) {
@@ -72,45 +75,46 @@ public final class ParserFactory {
       throw new RuntimeException(
           "Parse Plugins preferences could not be loaded.");
     }
-  }                      
-  
-   
+  }
+
   /**
    * Function returns an array of {@link Parser}s for a given content type.
-   *
+   * 
    * The function consults the internal list of parse plugins for the
-   * ParserFactory to determine the list of pluginIds, then gets the
-   * appropriate extension points to instantiate as {@link Parser}s.
-   *
-   * @param contentType The contentType to return the <code>Array</code>
-   *                    of {@link Parser}s for.
-   * @param url The url for the content that may allow us to get the type from
-   *            the file suffix.
+   * ParserFactory to determine the list of pluginIds, then gets the 
appropriate
+   * extension points to instantiate as {@link Parser}s.
+   * 
+   * @param contentType
+   *          The contentType to return the <code>Array</code> of {@link 
Parser}
+   *          s for.
+   * @param url
+   *          The url for the content that may allow us to get the type from 
the
+   *          file suffix.
    * @return An <code>Array</code> of {@link Parser}s for the given 
contentType.
    *         If there were plugins mapped to a contentType via the
-   *         <code>parse-plugins.xml</code> file, but never enabled via
-   *         the <code>plugin.includes</code> Nutch conf, then those plugins
-   *         won't be part of this array, i.e., they will be skipped.
-   *         So, if the ordered list of parsing plugins for
-   *         <code>text/plain</code> was <code>[parse-text,parse-html,
+   *         <code>parse-plugins.xml</code> file, but never enabled via the
+   *         <code>plugin.includes</code> Nutch conf, then those plugins won't
+   *         be part of this array, i.e., they will be skipped. So, if the
+   *         ordered list of parsing plugins for <code>text/plain</code> was
+   *         <code>[parse-text,parse-html,
    *         parse-rtf]</code>, and only <code>parse-html</code> and
    *         <code>parse-rtf</code> were enabled via
-   *         <code>plugin.includes</code>, then this ordered Array would
-   *         consist of two {@link Parser} interfaces,
+   *         <code>plugin.includes</code>, then this ordered Array would 
consist
+   *         of two {@link Parser} interfaces,
    *         <code>[parse-html, parse-rtf]</code>.
    */
   public Parser[] getParsers(String contentType, String url)
-  throws ParserNotFound {
-    
+      throws ParserNotFound {
+
     List<Parser> parsers = null;
     List<Extension> parserExts = null;
-    
+
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     // TODO once the MimeTypes is available
     // parsers = getExtensions(MimeUtils.map(contentType));
     // if (parsers != null) {
-    //   return parsers;
+    // return parsers;
     // }
     // Last Chance: Guess content-type from file url...
     // parsers = getExtensions(MimeUtils.getMimeType(url));
@@ -121,50 +125,51 @@ public final class ParserFactory {
     }
 
     parsers = new Vector<Parser>(parserExts.size());
-    for (Iterator<Extension> i = parserExts.iterator(); i.hasNext(); ){
+    for (Iterator<Extension> i = parserExts.iterator(); i.hasNext();) {
       Extension ext = i.next();
       Parser p = null;
       try {
-        //check to see if we've cached this parser instance yet
+        // check to see if we've cached this parser instance yet
         p = (Parser) objectCache.getObject(ext.getId());
         if (p == null) {
           // go ahead and instantiate it and then cache it
           p = (Parser) ext.getExtensionInstance();
-          objectCache.setObject(ext.getId(),p);
+          objectCache.setObject(ext.getId(), p);
         }
         parsers.add(p);
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
           LOG.warn("ParserFactory:PluginRuntimeException when "
-                 + "initializing parser plugin "
-                 + ext.getDescriptor().getPluginId()
-                 + " instance in getParsers "
-                 + "function: attempting to continue instantiating parsers");
+              + "initializing parser plugin "
+              + ext.getDescriptor().getPluginId() + " instance in getParsers "
+              + "function: attempting to continue instantiating parsers");
         }
       }
     }
-    return parsers.toArray(new Parser[]{});
+    return parsers.toArray(new Parser[] {});
   }
-    
+
   /**
    * Function returns a {@link Parser} instance with the specified
-   * <code>extId</code>, representing its extension ID. If the Parser
-   * instance isn't found, then the function throws a
-   * <code>ParserNotFound</code> exception. If the function is able to find
-   * the {@link Parser} in the internal <code>PARSER_CACHE</code> then it
-   * will return the already instantiated Parser. Otherwise, if it has to
-   * instantiate the Parser itself , then this function will cache that Parser
-   * in the internal <code>PARSER_CACHE</code>.
+   * <code>extId</code>, representing its extension ID. If the Parser instance
+   * isn't found, then the function throws a <code>ParserNotFound</code>
+   * exception. If the function is able to find the {@link Parser} in the
+   * internal <code>PARSER_CACHE</code> then it will return the already
+   * instantiated Parser. Otherwise, if it has to instantiate the Parser itself
+   * , then this function will cache that Parser in the internal
+   * <code>PARSER_CACHE</code>.
    * 
-   * @param id The string extension ID (e.g.,
-   *        "org.apache.nutch.parse.rss.RSSParser",
-   *        "org.apache.nutch.parse.rtf.RTFParseFactory") of the {@link Parser}
-   *        implementation to return.
+   * @param id
+   *          The string extension ID (e.g.,
+   *          "org.apache.nutch.parse.rss.RSSParser",
+   *          "org.apache.nutch.parse.rtf.RTFParseFactory") of the
+   *          {@link Parser} implementation to return.
    * @return A {@link Parser} implementation specified by the parameter
    *         <code>id</code>.
-   * @throws ParserNotFound If the Parser is not found (i.e., registered with
-   *         the extension point), or if the there a
-   *         {@link PluginRuntimeException} instantiating the {@link Parser}.
+   * @throws ParserNotFound
+   *           If the Parser is not found (i.e., registered with the extension
+   *           point), or if the there a {@link PluginRuntimeException}
+   *           instantiating the {@link Parser}.
    */
   public Parser getParserById(String id) throws ParserNotFound {
 
@@ -172,7 +177,7 @@ public final class ParserFactory {
     Extension parserExt = null;
 
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (id != null) {
       parserExt = getExtension(extensions, id);
     }
@@ -183,12 +188,12 @@ public final class ParserFactory {
     if (parserExt == null) {
       throw new ParserNotFound("No Parser Found for id [" + id + "]");
     }
-    
-    // first check the cache              
+
+    // first check the cache
     if (objectCache.getObject(parserExt.getId()) != null) {
       return (Parser) objectCache.getObject(parserExt.getId());
 
-    // if not found in cache, instantiate the Parser    
+      // if not found in cache, instantiate the Parser
     } else {
       try {
         Parser p = (Parser) parserExt.getExtensionInstance();
@@ -196,31 +201,31 @@ public final class ParserFactory {
         return p;
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Canno initialize parser " +
-                   parserExt.getDescriptor().getPluginId() +
-                   " (cause: " + e.toString());
+          LOG.warn("Canno initialize parser "
+              + parserExt.getDescriptor().getPluginId() + " (cause: "
+              + e.toString());
         }
         throw new ParserNotFound("Cannot init parser for id [" + id + "]");
       }
     }
   }
-  
+
   /**
    * Finds the best-suited parse plugin for a given contentType.
    * 
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return a list of extensions to be used for this contentType.
-   *         If none, returns <code>null</code>.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return a list of extensions to be used for this contentType. If none,
+   *         returns <code>null</code>.
    */
   @SuppressWarnings("unchecked")
   protected List<Extension> getExtensions(String contentType) {
-    
+
     ObjectCache objectCache = ObjectCache.get(conf);
     // First of all, tries to clean the content-type
     String type = null;
     type = MimeUtil.cleanMimeType(contentType);
 
-
     List<Extension> extensions = (List<Extension>) objectCache.getObject(type);
 
     // Just compare the reference:
@@ -228,100 +233,105 @@ public final class ParserFactory {
     if (extensions == EMPTY_EXTENSION_LIST) {
       return null;
     }
-    
+
     if (extensions == null) {
       extensions = findExtensions(type);
       if (extensions != null) {
         objectCache.setObject(type, extensions);
       } else {
-       // Put the empty extension list into cache
-       // to remember we don't know any related extension.
+        // Put the empty extension list into cache
+        // to remember we don't know any related extension.
         objectCache.setObject(type, EMPTY_EXTENSION_LIST);
       }
     }
     return extensions;
   }
-  
+
   /**
    * searches a list of suitable parse plugins for the given contentType.
-   * <p>It first looks for a preferred plugin defined in the parse-plugin
-   * file.  If none is found, it returns a list of default plugins.
+   * <p>
+   * It first looks for a preferred plugin defined in the parse-plugin file. If
+   * none is found, it returns a list of default plugins.
    * 
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If 
none,
+   *         returns null.
    */
   private List<Extension> findExtensions(String contentType) {
-    
+
     Extension[] extensions = this.extensionPoint.getExtensions();
-    
+
     // Look for a preferred plugin.
-    List<String> parsePluginList =
-      this.parsePluginList.getPluginList(contentType);
-    List<Extension> extensionList =
-      matchExtensions(parsePluginList, extensions, contentType);
+    List<String> parsePluginList = this.parsePluginList
+        .getPluginList(contentType);
+    List<Extension> extensionList = matchExtensions(parsePluginList,
+        extensions, contentType);
     if (extensionList != null) {
       return extensionList;
     }
-    
+
     // If none found, look for a default plugin.
     parsePluginList = this.parsePluginList.getPluginList(DEFAULT_PLUGIN);
     return matchExtensions(parsePluginList, extensions, DEFAULT_PLUGIN);
   }
-  
+
   /**
    * Tries to find a suitable parser for the given contentType.
    * <ol>
-   * <li>It checks if a parser which accepts the contentType
-   * can be found in the <code>plugins</code> list;</li>
-   * <li>If this list is empty, it tries to find amongst the loaded
-   * extensions whether some of them might suit and warns the user.</li>
+   * <li>It checks if a parser which accepts the contentType can be found in 
the
+   * <code>plugins</code> list;</li>
+   * <li>If this list is empty, it tries to find amongst the loaded extensions
+   * whether some of them might suit and warns the user.</li>
    * </ol>
-   * @param plugins List of candidate plugins.
-   * @param extensions Array of loaded extensions.
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * 
+   * @param plugins
+   *          List of candidate plugins.
+   * @param extensions
+   *          Array of loaded extensions.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If 
none,
+   *         returns null.
    */
   private List<Extension> matchExtensions(List<String> plugins,
-                               Extension[] extensions,
-                               String contentType) {
-    
+      Extension[] extensions, String contentType) {
+
     List<Extension> extList = new ArrayList<Extension>();
     if (plugins != null) {
-      
+
       for (String parsePluginId : plugins) {
-        
+
         Extension ext = getExtension(extensions, parsePluginId, contentType);
         // the extension returned may be null
         // that means that it was not enabled in the plugin.includes
         // nutch conf property, but it was mapped in the
         // parse-plugins.xml
-        // file. 
+        // file.
         // OR it was enabled in plugin.includes, but the plugin's plugin.xml
         // file does not claim that the plugin supports the specified mimeType
         // in either case, LOG the appropriate error message to WARN level
-        
+
         if (ext == null) {
-          //try to get it just by its pluginId
+          // try to get it just by its pluginId
           ext = getExtension(extensions, parsePluginId);
-          
-          if (LOG.isWarnEnabled()) { 
+
+          if (LOG.isWarnEnabled()) {
             if (ext != null) {
               // plugin was enabled via plugin.includes
               // its plugin.xml just doesn't claim to support that
               // particular mimeType
-              LOG.warn("ParserFactory:Plugin: " + parsePluginId +
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but " + "its plugin.xml " +
-                       "file does not claim to support contentType: " +
-                       contentType);
+              LOG.warn("ParserFactory:Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but " + "its plugin.xml "
+                  + "file does not claim to support contentType: "
+                  + contentType);
             } else {
               // plugin wasn't enabled via plugin.includes
-              LOG.warn("ParserFactory: Plugin: " + parsePluginId + 
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but not enabled via " +
-                       "plugin.includes in nutch-default.xml");                
     
+              LOG.warn("ParserFactory: Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but not enabled via "
+                  + "plugin.includes in nutch-default.xml");
             }
           }
         }
@@ -331,7 +341,7 @@ public final class ParserFactory {
           extList.add(ext);
         }
       }
-      
+
     } else {
       // okay, there were no list of plugins defined for
       // this mimeType, however, there may be plugins registered
@@ -340,75 +350,78 @@ public final class ParserFactory {
       // so, iterate through the list of extensions and if you find
       // any extensions where this is the case, throw a
       // NotMappedParserException
-      
-      for (int i=0; i<extensions.length; i++) {
-       if ("*".equals(extensions[i].getAttribute("contentType"))){
+
+      for (int i = 0; i < extensions.length; i++) {
+        if ("*".equals(extensions[i].getAttribute("contentType"))) {
           extList.add(0, extensions[i]);
-        }
-        else if (extensions[i].getAttribute("contentType") != null
-            && 
contentType.matches(escapeContentType(extensions[i].getAttribute("contentType"))))
 {
+        } else if (extensions[i].getAttribute("contentType") != null
+            && contentType.matches(escapeContentType(extensions[i]
+                .getAttribute("contentType")))) {
           extList.add(extensions[i]);
         }
       }
-      
+
       if (extList.size() > 0) {
         if (LOG.isInfoEnabled()) {
           StringBuffer extensionsIDs = new StringBuffer("[");
           boolean isFirst = true;
-          for (Extension ext : extList){
-                 if (!isFirst) extensionsIDs.append(" - ");
-                 else isFirst=false;
-                 extensionsIDs.append(ext.getId());
+          for (Extension ext : extList) {
+            if (!isFirst)
+              extensionsIDs.append(" - ");
+            else
+              isFirst = false;
+            extensionsIDs.append(ext.getId());
           }
-         extensionsIDs.append("]");
-          LOG.info("The parsing plugins: " + extensionsIDs.toString() +
-                   " are enabled via the plugin.includes system " +
-                   "property, and all claim to support the content type " +
-                   contentType + ", but they are not mapped to it  in the " +
-                   "parse-plugins.xml file");
+          extensionsIDs.append("]");
+          LOG.info("The parsing plugins: " + extensionsIDs.toString()
+              + " are enabled via the plugin.includes system "
+              + "property, and all claim to support the content type "
+              + contentType + ", but they are not mapped to it  in the "
+              + "parse-plugins.xml file");
         }
       } else if (LOG.isDebugEnabled()) {
-        LOG.debug("ParserFactory:No parse plugins mapped or enabled for " +
-                  "contentType " + contentType);
+        LOG.debug("ParserFactory:No parse plugins mapped or enabled for "
+            + "contentType " + contentType);
       }
     }
-    
+
     return (extList.size() > 0) ? extList : null;
   }
-  
+
   private String escapeContentType(String contentType) {
-       // Escapes contentType in order to use as a regex 
-       // (and keep backwards compatibility).
-       // This enables to accept multiple types for a single parser. 
-       return contentType.replace("+", "\\+").replace(".", "\\.");
-       }
+    // Escapes contentType in order to use as a regex
+    // (and keep backwards compatibility).
+    // This enables to accept multiple types for a single parser.
+    return contentType.replace("+", "\\+").replace(".", "\\.");
+  }
 
   private boolean match(Extension extension, String id, String type) {
-    return ((id.equals(extension.getId())) &&
-            (extension.getAttribute("contentType").equals("*") || 
-             
type.matches(escapeContentType(extension.getAttribute("contentType"))) ||
-             type.equals(DEFAULT_PLUGIN)));
+    return ((id.equals(extension.getId())) && (extension.getAttribute(
+        "contentType").equals("*")
+        || type
+            .matches(escapeContentType(extension.getAttribute("contentType"))) 
|| type
+          .equals(DEFAULT_PLUGIN)));
   }
-  
+
   /** Get an extension from its id and supported content-type. */
   private Extension getExtension(Extension[] list, String id, String type) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (match(list[i], id, type)) {
         return list[i];
       }
     }
     return null;
   }
-    
+
   private Extension getExtension(Extension[] list, String id) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (id.equals(list[i].getId())) {
         return list[i];
       }
     }
     return null;
   }
-  
+
   private Extension getExtensionFromAlias(Extension[] list, String id) {
     return getExtension(list, parsePluginList.getAliases().get(id));
   }


Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParserNotFound.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParserNotFound.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParserNotFound.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParserNotFound.java Thu Jan 29 
05:38:59 2015
@@ -18,17 +18,17 @@ package org.apache.nutch.parse;
 
 public class ParserNotFound extends ParseException {
 
-  private static final long serialVersionUID=23993993939L;
+  private static final long serialVersionUID = 23993993939L;
   private String url;
   private String contentType;
 
-  public ParserNotFound(String message){
-    super(message);    
+  public ParserNotFound(String message) {
+    super(message);
   }
-  
+
   public ParserNotFound(String url, String contentType) {
-    this(url, contentType,
-         "parser not found for contentType="+contentType+" url="+url);
+    this(url, contentType, "parser not found for contentType=" + contentType
+        + " url=" + url);
   }
 
   public ParserNotFound(String url, String contentType, String message) {
@@ -37,6 +37,11 @@ public class ParserNotFound extends Pars
     this.contentType = contentType;
   }
 
-  public String getUrl() { return url; }
-  public String getContentType() { return contentType; }
+  public String getUrl() {
+    return url;
+  }
+
+  public String getContentType() {
+    return contentType;
+  }
 }

Modified: nutch/trunk/src/java/org/apache/nutch/parse/package-info.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/package-info.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/package-info.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/package-info.java Thu Jan 29 
05:38:59 2015
@@ -19,3 +19,4 @@
  * The {@link org.apache.nutch.parse.Parse Parse} interface and related 
classes.
  */
 package org.apache.nutch.parse;
+

Modified: 
nutch/trunk/src/java/org/apache/nutch/plugin/CircularDependencyException.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/CircularDependencyException.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- 
nutch/trunk/src/java/org/apache/nutch/plugin/CircularDependencyException.java 
(original)
+++ 
nutch/trunk/src/java/org/apache/nutch/plugin/CircularDependencyException.java 
Thu Jan 29 05:38:59 2015
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.plugin;
 
-
 /**
  * <code>CircularDependencyException</code> will be thrown if a circular
  * dependency is detected.

Modified: nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java Thu Jan 29 
05:38:59 2015
@@ -94,8 +94,10 @@ public class Extension {
    * Adds a attribute and is only used until model creation at plugin system
    * start up.
    * 
-   * @param pKey a key
-   * @param pValue a value
+   * @param pKey
+   *          a key
+   * @param pValue
+   *          a value
    */
   public void addAttribute(String pKey, String pValue) {
     fAttributes.put(pKey, pValue);
@@ -105,7 +107,8 @@ public class Extension {
    * Sets the Class that implement the concret extension and is only used until
    * model creation at system start up.
    * 
-   * @param extensionClazz The extensionClasname to set
+   * @param extensionClazz
+   *          The extensionClasname to set
    */
   public void setClazz(String extensionClazz) {
     fClazz = extensionClazz;
@@ -115,7 +118,8 @@ public class Extension {
    * Sets the unique extension Id and is only used until model creation at
    * system start up.
    * 
-   * @param extensionID The extensionID to set
+   * @param extensionID
+   *          The extensionID to set
    */
   public void setId(String extensionID) {
     fId = extensionID;
@@ -147,10 +151,10 @@ public class Extension {
     // The same is in PluginRepository.getPluginInstance().
     // Suggested by Stefan Groschupf <[email protected]>
     synchronized (getId()) {
-      try {      
+      try {
         PluginRepository pluginRepository = PluginRepository.get(conf);
-        Class extensionClazz = 
-          pluginRepository.getCachedClass(fDescriptor, getClazz());
+        Class extensionClazz = pluginRepository.getCachedClass(fDescriptor,
+            getClazz());
         // lazy loading of Plugin in case there is no instance of the plugin
         // already.
         pluginRepository.getPluginInstance(getDescriptor());

Modified: nutch/trunk/src/java/org/apache/nutch/plugin/ExtensionPoint.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/ExtensionPoint.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/ExtensionPoint.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/ExtensionPoint.java Thu Jan 29 
05:38:59 2015
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 import java.util.ArrayList;
 
 /**
@@ -76,7 +77,8 @@ public class ExtensionPoint {
   /**
    * Sets the extensionPointId.
    * 
-   * @param pId extension point id
+   * @param pId
+   *          extension point id
    */
   private void setId(String pId) {
     ftId = pId;

Modified: 
nutch/trunk/src/java/org/apache/nutch/plugin/MissingDependencyException.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/MissingDependencyException.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- 
nutch/trunk/src/java/org/apache/nutch/plugin/MissingDependencyException.java 
(original)
+++ 
nutch/trunk/src/java/org/apache/nutch/plugin/MissingDependencyException.java 
Thu Jan 29 05:38:59 2015
@@ -17,8 +17,8 @@
 package org.apache.nutch.plugin;
 
 /**
- * <code>MissingDependencyException</code> will be thrown if a plugin
- * dependency cannot be found.
+ * <code>MissingDependencyException</code> will be thrown if a plugin 
dependency
+ * cannot be found.
  * 
  * @author J&eacute;r&ocirc;me Charron
  */

Modified: nutch/trunk/src/java/org/apache/nutch/plugin/Pluggable.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/Pluggable.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/Pluggable.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/Pluggable.java Thu Jan 29 
05:38:59 2015
@@ -17,16 +17,15 @@
 package org.apache.nutch.plugin;
 
 /**
- * Defines the capability of a class to be plugged into Nutch.
- * This is a common interface that must be implemented by all
- * Nutch Extension Points.
- *
+ * Defines the capability of a class to be plugged into Nutch. This is a common
+ * interface that must be implemented by all Nutch Extension Points.
+ * 
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  * @see <a href="http://wiki.apache.org/nutch/AboutPlugins";>About Plugins</a>
- * @see <a href="package-summary.html#package_description">
- *      plugin package description</a>
+ * @see <a href="package-summary.html#package_description"> plugin package
+ *      description</a>
  */
 public interface Pluggable {
-  
+
 }

Modified: nutch/trunk/src/java/org/apache/nutch/plugin/Plugin.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/Plugin.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/Plugin.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/Plugin.java Thu Jan 29 
05:38:59 2015
@@ -33,8 +33,8 @@ import org.apache.hadoop.conf.Configurat
  * The <code>Plugin</code> will be startuped and shutdown by the nutch plugin
  * management system.
  * 
- * A possible usecase of the <code>Plugin</code> implementation is to create
- * or close a database connection.
+ * A possible usecase of the <code>Plugin</code> implementation is to create or
+ * close a database connection.
  * 
  * @author joa23
  */
@@ -81,7 +81,8 @@ public class Plugin {
   }
 
   /**
-   * @param descriptor The descriptor to set
+   * @param descriptor
+   *          The descriptor to set
    */
   private void setDescriptor(PluginDescriptor descriptor) {
     fDescriptor = descriptor;

Modified: nutch/trunk/src/java/org/apache/nutch/plugin/PluginClassLoader.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/PluginClassLoader.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/PluginClassLoader.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/PluginClassLoader.java Thu Jan 
29 05:38:59 2015
@@ -45,11 +45,11 @@ public class PluginClassLoader extends U
    */
   public PluginClassLoader(URL[] urls, ClassLoader parent) {
     super(urls, parent);
-    
+
     this.urls = urls;
     this.parent = parent;
   }
-  
+
   @Override
   public int hashCode() {
     final int PRIME = 31;

Modified: nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java Thu Jan 
29 05:38:59 2015
@@ -31,12 +31,11 @@ import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
 
 /**
- * The <code>PluginDescriptor</code> provide access to all meta information of
- * a nutch-plugin, as well to the internationalizable resources and the plugin
- * own classloader. There are meta information about <code>Plugin</code>,
- * <code>ExtensionPoint</code> and <code>Extension</code>. To provide
- * access to the meta data of a plugin via a descriptor allow a lazy loading
- * mechanism.
+ * The <code>PluginDescriptor</code> provide access to all meta information of 
a
+ * nutch-plugin, as well to the internationalizable resources and the plugin 
own
+ * classloader. There are meta information about <code>Plugin</code>,
+ * <code>ExtensionPoint</code> and <code>Extension</code>. To provide access to
+ * the meta data of a plugin via a descriptor allow a lazy loading mechanism.
  */
 public class PluginDescriptor {
   private String fPluginPath;
@@ -52,7 +51,8 @@ public class PluginDescriptor {
   private ArrayList<URL> fNotExportedLibs = new ArrayList<URL>();
   private ArrayList<Extension> fExtensions = new ArrayList<Extension>();
   private PluginClassLoader fClassLoader;
-  public static final Logger LOG = 
LoggerFactory.getLogger(PluginDescriptor.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginDescriptor.class);
   private Configuration fConf;
 
   /**
@@ -205,18 +205,19 @@ public class PluginDescriptor {
   /**
    * Adds a dependency
    * 
-   * @param pId id of the dependent plugin
+   * @param pId
+   *          id of the dependent plugin
    */
   public void addDependency(String pId) {
     fDependencies.add(pId);
   }
 
   /**
-   * Adds a exported library with a relative path to the plugin directory.
-   * We automatically escape characters that are illegal in URLs. It is 
-   * recommended that code converts an abstract pathname into a URL by 
-   * first converting it into a URI, via the toURI method, and then 
-   * converting the URI into a URL via the URI.toURL method.
+   * Adds a exported library with a relative path to the plugin directory. We
+   * automatically escape characters that are illegal in URLs. It is 
recommended
+   * that code converts an abstract pathname into a URL by first converting it
+   * into a URI, via the toURI method, and then converting the URI into a URL
+   * via the URI.toURL method.
    * 
    * @param pLibPath
    */
@@ -246,11 +247,11 @@ public class PluginDescriptor {
   }
 
   /**
-   * Adds a exported library with a relative path to the plugin directory.
-   * We automatically escape characters that are illegal in URLs. It is 
-   * recommended that code converts an abstract pathname into a URL by 
-   * first converting it into a URI, via the toURI method, and then 
-   * converting the URI into a URL via the URI.toURL method.
+   * Adds a exported library with a relative path to the plugin directory. We
+   * automatically escape characters that are illegal in URLs. It is 
recommended
+   * that code converts an abstract pathname into a URL by first converting it
+   * into a URI, via the toURI method, and then converting the URI into a URL
+   * via the URI.toURL method.
    * 
    * @param pLibPath
    */
@@ -294,8 +295,8 @@ public class PluginDescriptor {
       LOG.debug(getPluginId() + " " + e.toString());
     }
     URL[] urls = arrayList.toArray(new URL[arrayList.size()]);
-    fClassLoader = new PluginClassLoader(urls, PluginDescriptor.class
-        .getClassLoader());
+    fClassLoader = new PluginClassLoader(urls,
+        PluginDescriptor.class.getClassLoader());
     return fClassLoader;
   }
 
@@ -317,7 +318,7 @@ public class PluginDescriptor {
     for (String id : pDescriptor.getDependencies()) {
       PluginDescriptor descriptor = PluginRepository.get(fConf)
           .getPluginDescriptor(id);
-      for (URL url: descriptor.getExportedLibUrls()) {
+      for (URL url : descriptor.getExportedLibUrls()) {
         pLibs.add(url);
       }
       collectLibs(pLibs, descriptor);

Modified: nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java Thu 
Jan 29 05:38:59 2015
@@ -40,8 +40,8 @@ import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
 
 /**
- * The <code>PluginManifestParser</code> parser just parse the manifest file
- * in all plugin directories.
+ * The <code>PluginManifestParser</code> parser just parse the manifest file in
+ * all plugin directories.
  * 
  * @author joa23
  */
@@ -94,7 +94,8 @@ public class PluginManifestParser {
             PluginDescriptor p = parseManifestFile(manifestPath);
             map.put(p.getPluginId(), p);
           } catch (Exception e) {
-            LOG.warn("Error while loading plugin `" + manifestPath + "` " + 
e.toString());
+            LOG.warn("Error while loading plugin `" + manifestPath + "` "
+                + e.toString());
           }
         }
       }
@@ -183,7 +184,7 @@ public class PluginManifestParser {
     PluginDescriptor pluginDescriptor = new PluginDescriptor(id, version, name,
         providerName, pluginClazz, pPath, this.conf);
     LOG.debug("plugin: id=" + id + " name=" + name + " version=" + version
-          + " provider=" + providerName + "class=" + pluginClazz);
+        + " provider=" + providerName + "class=" + pluginClazz);
     parseExtension(rootElement, pluginDescriptor);
     parseExtensionPoints(rootElement, pluginDescriptor);
     parseLibraries(rootElement, pluginDescriptor);
@@ -290,8 +291,8 @@ public class PluginManifestParser {
             if (parameters != null) {
               for (int k = 0; k < parameters.getLength(); k++) {
                 Element param = (Element) parameters.item(k);
-                extension.addAttribute(param.getAttribute(ATTR_NAME), param
-                    .getAttribute("value"));
+                extension.addAttribute(param.getAttribute(ATTR_NAME),
+                    param.getAttribute("value"));
               }
             }
             pPluginDescriptor.addExtension(extension);

Modified: nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java Thu Jan 
29 05:38:59 2015
@@ -53,13 +53,13 @@ public class PluginRepository {
   private HashMap<String, ExtensionPoint> fExtensionPoints;
 
   private HashMap<String, Plugin> fActivatedPlugins;
-  
-  private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE =
-    new HashMap<String, Map<PluginClassLoader,Class>>();
+
+  private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE 
= new HashMap<String, Map<PluginClassLoader, Class>>();
 
   private Configuration conf;
 
-  public static final Logger LOG = 
LoggerFactory.getLogger(PluginRepository.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginRepository.class);
 
   /**
    * @throws PluginRuntimeException
@@ -71,7 +71,8 @@ public class PluginRepository {
     this.conf = new Configuration(conf);
     this.auto = conf.getBoolean("plugin.auto-activation", true);
     String[] pluginFolders = conf.getStrings("plugin.folders");
-    PluginManifestParser manifestParser = new PluginManifestParser(this.conf, 
this);
+    PluginManifestParser manifestParser = new PluginManifestParser(this.conf,
+        this);
     Map<String, PluginDescriptor> allPlugins = manifestParser
         .parsePluginFolder(pluginFolders);
     if (allPlugins.isEmpty()) {
@@ -88,7 +89,7 @@ public class PluginRepository {
     try {
       installExtensions(fRegisteredPlugins);
     } catch (PluginRuntimeException e) {
-        LOG.error(e.toString());
+      LOG.error(e.toString());
       throw new RuntimeException(e.getMessage());
     }
     displayStatus();
@@ -115,8 +116,8 @@ public class PluginRepository {
       return;
     }
 
-    for (PluginDescriptor plugin: plugins) {
-      for(ExtensionPoint point:plugin.getExtenstionPoints()) {
+    for (PluginDescriptor plugin : plugins) {
+      for (ExtensionPoint point : plugin.getExtenstionPoints()) {
         String xpId = point.getId();
         LOG.debug("Adding extension point " + xpId);
         fExtensionPoints.put(xpId, point);
@@ -131,7 +132,7 @@ public class PluginRepository {
       throws PluginRuntimeException {
 
     for (PluginDescriptor descriptor : pRegisteredPlugins) {
-      for(Extension extension:descriptor.getExtensions()) {
+      for (Extension extension : descriptor.getExtensions()) {
         String xpId = extension.getTargetPoint();
         ExtensionPoint point = getExtensionPoint(xpId);
         if (point == null) {
@@ -159,7 +160,7 @@ public class PluginRepository {
     branch.put(plugin.getPluginId(), plugin);
 
     // Otherwise, checks each dependency
-    for(String id:plugin.getDependencies()) {
+    for (String id : plugin.getDependencies()) {
       PluginDescriptor dependency = plugins.get(id);
       if (dependency == null) {
         throw new MissingDependencyException("Missing dependency " + id
@@ -274,7 +275,8 @@ public class PluginRepository {
       // The same is in Extension.getExtensionInstance().
       // Suggested by Stefan Groschupf <[email protected]>
       synchronized (pDescriptor) {
-        Class<?> pluginClass = getCachedClass(pDescriptor, 
pDescriptor.getPluginClass());
+        Class<?> pluginClass = getCachedClass(pDescriptor,
+            pDescriptor.getPluginClass());
         Constructor<?> constructor = pluginClass.getConstructor(new Class<?>[] 
{
             PluginDescriptor.class, Configuration.class });
         Plugin plugin = (Plugin) constructor.newInstance(new Object[] {
@@ -315,9 +317,9 @@ public class PluginRepository {
       plugin.shutDown();
     }
   }
-  
+
   public Class getCachedClass(PluginDescriptor pDescriptor, String className)
-  throws ClassNotFoundException {
+      throws ClassNotFoundException {
     Map<PluginClassLoader, Class> descMap = CLASS_CACHE.get(className);
     if (descMap == null) {
       descMap = new HashMap<PluginClassLoader, Class>();
@@ -396,7 +398,7 @@ public class PluginRepository {
     }
     return map;
   }
-  
+
   /**
    * Get ordered list of plugins. Filter and normalization plugins are applied
    * in a configurable "pipeline" order, e.g., if one plugin depends on the
@@ -412,8 +414,8 @@ public class PluginRepository {
    *          property name defining plugin order
    * @return array of plugin instances
    */
-  public synchronized Object[] getOrderedPlugins(Class<?> clazz, String 
xPointId,
-      String orderProperty) {
+  public synchronized Object[] getOrderedPlugins(Class<?> clazz,
+      String xPointId, String orderProperty) {
     Object[] filters;
     ObjectCache objectCache = ObjectCache.get(conf);
     filters = (Object[]) objectCache.getObject(clazz.getName());

Modified: 
nutch/trunk/src/java/org/apache/nutch/plugin/PluginRuntimeException.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRuntimeException.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/PluginRuntimeException.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/PluginRuntimeException.java 
Thu Jan 29 05:38:59 2015
@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * <code>PluginRuntimeException</code> will be thrown until a exception in the
  * plugin managemnt occurs.

Modified: nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/Content.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/Content.java Thu Jan 29 
05:38:59 2015
@@ -42,7 +42,7 @@ import org.apache.nutch.metadata.Metadat
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-public final class Content implements Writable{
+public final class Content implements Writable {
 
   public static final String DIR_NAME = "content";
 
@@ -121,11 +121,11 @@ public final class Content implements Wr
       metadata.readFields(in); // read meta data
       break;
     default:
-      throw new VersionMismatchException((byte)2, oldVersion);
+      throw new VersionMismatchException((byte) 2, oldVersion);
     }
 
   }
-  
+
   public final void readFields(DataInput in) throws IOException {
     metadata.clear();
     int sizeOrVersion = in.readInt();
@@ -143,14 +143,14 @@ public final class Content implements Wr
         metadata.readFields(in);
         break;
       default:
-        throw new VersionMismatchException((byte)VERSION, (byte)version);
+        throw new VersionMismatchException((byte) VERSION, (byte) version);
       }
     } else { // size
       byte[] compressed = new byte[sizeOrVersion];
       in.readFully(compressed, 0, compressed.length);
       ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
-      DataInput inflater =
-        new DataInputStream(new InflaterInputStream(deflated));
+      DataInput inflater = new DataInputStream(
+          new InflaterInputStream(deflated));
       readFieldsCompressed(inflater);
     }
   }
@@ -184,8 +184,9 @@ public final class Content implements Wr
     return url;
   }
 
-  /** The base url for relative links contained in the content.
-   * Maybe be different from url if the request redirected.
+  /**
+   * The base url for relative links contained in the content. Maybe be
+   * different from url if the request redirected.
    */
   public String getBaseUrl() {
     return base;
@@ -200,7 +201,9 @@ public final class Content implements Wr
     this.content = content;
   }
 
-  /** The media type of the retrieved content.
+  /**
+   * The media type of the retrieved content.
+   * 
    * @see <a href="http://www.iana.org/assignments/media-types/";>
    *      http://www.iana.org/assignments/media-types/</a>
    */
@@ -258,13 +261,12 @@ public final class Content implements Wr
     }
     Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    
-    GenericOptionsParser parser =
-      new GenericOptionsParser(conf, opts, argv);
-    
+
+    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
+
     String[] remainingArgs = parser.getRemainingArgs();
     FileSystem fs = FileSystem.get(conf);
-    
+
     try {
       int recno = Integer.parseInt(remainingArgs[0]);
       String segment = remainingArgs[1];

Modified: nutch/trunk/src/java/org/apache/nutch/protocol/Protocol.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/Protocol.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/Protocol.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/Protocol.java Thu Jan 29 
05:38:59 2015
@@ -27,12 +27,11 @@ import org.apache.nutch.plugin.Pluggable
 
 import crawlercommons.robots.BaseRobotRules;
 
-
 /** A retriever of url content. Implemented by protocol extensions. */
 public interface Protocol extends Pluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = Protocol.class.getName();
-  
+
   /**
    * Property name. If in the current configuration this property is set to
    * true, protocol implementations should handle "politeness" limits
@@ -51,16 +50,19 @@ public interface Protocol extends Plugga
    */
   public final static String CHECK_ROBOTS = "protocol.plugin.check.robots";
 
-  /** Returns the {@link Content} for a fetchlist entry.
+  /**
+   * Returns the {@link Content} for a fetchlist entry.
    */
   ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum);
 
   /**
    * Retrieve robot rules applicable for this url.
-   * @param url url to check
-   * @param datum page datum
+   * 
+   * @param url
+   *          url to check
+   * @param datum
+   *          page datum
    * @return robot rules (specific for this url or default), never null
    */
   BaseRobotRules getRobotRules(Text url, CrawlDatum datum);
 }
-

Modified: nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolFactory.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolFactory.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolFactory.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolFactory.java Thu Jan 
29 05:38:59 2015
@@ -37,7 +37,8 @@ import org.apache.hadoop.conf.Configurat
  */
 public class ProtocolFactory {
 
-  public static final Logger LOG = 
LoggerFactory.getLogger(ProtocolFactory.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ProtocolFactory.class);
 
   private ExtensionPoint extensionPoint;
 

Modified: nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolNotFound.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolNotFound.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolNotFound.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolNotFound.java Thu 
Jan 29 05:38:59 2015
@@ -22,7 +22,7 @@ public class ProtocolNotFound extends Pr
   private String url;
 
   public ProtocolNotFound(String url) {
-    this(url, "protocol not found for url="+url);
+    this(url, "protocol not found for url=" + url);
   }
 
   public ProtocolNotFound(String url, String message) {
@@ -30,5 +30,7 @@ public class ProtocolNotFound extends Pr
     this.url = url;
   }
 
-  public String getUrl() { return url; }
+  public String getUrl() {
+    return url;
+  }
 }

Modified: nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolOutput.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolOutput.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolOutput.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolOutput.java Thu Jan 
29 05:38:59 2015
@@ -18,8 +18,9 @@
 package org.apache.nutch.protocol;
 
 /**
- * Simple aggregate to pass from protocol plugins both content and
- * protocol status.
+ * Simple aggregate to pass from protocol plugins both content and protocol
+ * status.
+ * 
  * @author Andrzej Bialecki &lt;[email protected]&gt;
  */
 public class ProtocolOutput {
@@ -30,12 +31,12 @@ public class ProtocolOutput {
     this.content = content;
     this.status = status;
   }
-  
+
   public ProtocolOutput(Content content) {
     this.content = content;
     this.status = ProtocolStatus.STATUS_SUCCESS;
   }
-  
+
   public Content getContent() {
     return content;
   }

Modified: nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java Thu Jan 
29 05:38:59 2015
@@ -30,65 +30,76 @@ import org.apache.hadoop.io.WritableUtil
  * @author Andrzej Bialecki
  */
 public class ProtocolStatus implements Writable {
-  
+
   private final static byte VERSION = 2;
-  
+
   /** Content was retrieved without errors. */
-  public static final int SUCCESS              = 1;
+  public static final int SUCCESS = 1;
   /** Content was not retrieved. Any further errors may be indicated in args. 
*/
-  public static final int FAILED               = 2;
-  
-  /** This protocol was not found.  Application may attempt to retry later. */
-  public static final int PROTO_NOT_FOUND      = 10;
+  public static final int FAILED = 2;
+
+  /** This protocol was not found. Application may attempt to retry later. */
+  public static final int PROTO_NOT_FOUND = 10;
   /** Resource is gone. */
-  public static final int GONE                 = 11;
+  public static final int GONE = 11;
   /** Resource has moved permanently. New url should be found in args. */
-  public static final int MOVED                = 12;
+  public static final int MOVED = 12;
   /** Resource has moved temporarily. New url should be found in args. */
-  public static final int TEMP_MOVED           = 13;
+  public static final int TEMP_MOVED = 13;
   /** Resource was not found. */
-  public static final int NOTFOUND             = 14;
+  public static final int NOTFOUND = 14;
   /** Temporary failure. Application may retry immediately. */
-  public static final int RETRY                = 15;
-  /** Unspecified exception occured. Further information may be provided in 
args. */
-  public static final int EXCEPTION            = 16;
+  public static final int RETRY = 15;
+  /**
+   * Unspecified exception occured. Further information may be provided in 
args.
+   */
+  public static final int EXCEPTION = 16;
   /** Access denied - authorization required, but missing/incorrect. */
-  public static final int ACCESS_DENIED        = 17;
+  public static final int ACCESS_DENIED = 17;
   /** Access denied by robots.txt rules. */
-  public static final int ROBOTS_DENIED        = 18;
+  public static final int ROBOTS_DENIED = 18;
   /** Too many redirects. */
-  public static final int REDIR_EXCEEDED       = 19;
+  public static final int REDIR_EXCEEDED = 19;
   /** Not fetching. */
-  public static final int NOTFETCHING          = 20;
+  public static final int NOTFETCHING = 20;
   /** Unchanged since the last fetch. */
-  public static final int NOTMODIFIED          = 21;
-  /** Request was refused by protocol plugins, because it would block.
-   * The expected number of milliseconds to wait before retry may be provided
-   * in args. */
-  public static final int WOULDBLOCK           = 22;
+  public static final int NOTMODIFIED = 21;
+  /**
+   * Request was refused by protocol plugins, because it would block. The
+   * expected number of milliseconds to wait before retry may be provided in
+   * args.
+   */
+  public static final int WOULDBLOCK = 22;
   /** Thread was blocked http.max.delays times during fetching. */
-  public static final int BLOCKED              = 23;
-   
+  public static final int BLOCKED = 23;
+
   // Useful static instances for status codes that don't usually require any
   // additional arguments.
-  public static final ProtocolStatus STATUS_SUCCESS = new 
ProtocolStatus(SUCCESS);
+  public static final ProtocolStatus STATUS_SUCCESS = new ProtocolStatus(
+      SUCCESS);
   public static final ProtocolStatus STATUS_FAILED = new 
ProtocolStatus(FAILED);
   public static final ProtocolStatus STATUS_GONE = new ProtocolStatus(GONE);
-  public static final ProtocolStatus STATUS_NOTFOUND = new 
ProtocolStatus(NOTFOUND);
+  public static final ProtocolStatus STATUS_NOTFOUND = new ProtocolStatus(
+      NOTFOUND);
   public static final ProtocolStatus STATUS_RETRY = new ProtocolStatus(RETRY);
-  public static final ProtocolStatus STATUS_ROBOTS_DENIED = new 
ProtocolStatus(ROBOTS_DENIED);
-  public static final ProtocolStatus STATUS_REDIR_EXCEEDED = new 
ProtocolStatus(REDIR_EXCEEDED);
-  public static final ProtocolStatus STATUS_NOTFETCHING = new 
ProtocolStatus(NOTFETCHING);
-  public static final ProtocolStatus STATUS_NOTMODIFIED = new 
ProtocolStatus(NOTMODIFIED);
-  public static final ProtocolStatus STATUS_WOULDBLOCK = new 
ProtocolStatus(WOULDBLOCK);
-  public static final ProtocolStatus STATUS_BLOCKED = new 
ProtocolStatus(BLOCKED);
-  
+  public static final ProtocolStatus STATUS_ROBOTS_DENIED = new ProtocolStatus(
+      ROBOTS_DENIED);
+  public static final ProtocolStatus STATUS_REDIR_EXCEEDED = new 
ProtocolStatus(
+      REDIR_EXCEEDED);
+  public static final ProtocolStatus STATUS_NOTFETCHING = new ProtocolStatus(
+      NOTFETCHING);
+  public static final ProtocolStatus STATUS_NOTMODIFIED = new ProtocolStatus(
+      NOTMODIFIED);
+  public static final ProtocolStatus STATUS_WOULDBLOCK = new ProtocolStatus(
+      WOULDBLOCK);
+  public static final ProtocolStatus STATUS_BLOCKED = new ProtocolStatus(
+      BLOCKED);
+
   private int code;
   private long lastModified;
   private String[] args;
-  
-  private static final HashMap<Integer, String> codeToName =
-    new HashMap<Integer, String>();
+
+  private static final HashMap<Integer, String> codeToName = new 
HashMap<Integer, String>();
   static {
     codeToName.put(new Integer(SUCCESS), "success");
     codeToName.put(new Integer(FAILED), "failed");
@@ -107,40 +118,41 @@ public class ProtocolStatus implements W
     codeToName.put(new Integer(WOULDBLOCK), "wouldblock");
     codeToName.put(new Integer(BLOCKED), "blocked");
   }
-  
+
   public ProtocolStatus() {
-    
+
   }
 
   public ProtocolStatus(int code, String[] args) {
     this.code = code;
     this.args = args;
   }
-  
+
   public ProtocolStatus(int code, String[] args, long lastModified) {
     this.code = code;
     this.args = args;
     this.lastModified = lastModified;
   }
-  
+
   public ProtocolStatus(int code) {
     this(code, null);
   }
-  
+
   public ProtocolStatus(int code, long lastModified) {
     this(code, null, lastModified);
   }
-  
+
   public ProtocolStatus(int code, Object message) {
     this(code, message, 0L);
   }
-  
+
   public ProtocolStatus(int code, Object message, long lastModified) {
     this.code = code;
     this.lastModified = lastModified;
-    if (message != null) this.args = new String[]{String.valueOf(message)};
+    if (message != null)
+      this.args = new String[] { String.valueOf(message) };
   }
-  
+
   public ProtocolStatus(Throwable t) {
     this(EXCEPTION, t);
   }
@@ -150,10 +162,10 @@ public class ProtocolStatus implements W
     res.readFields(in);
     return res;
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     byte version = in.readByte();
-    switch(version) {
+    switch (version) {
     case 1:
       code = in.readByte();
       lastModified = in.readLong();
@@ -168,10 +180,10 @@ public class ProtocolStatus implements W
       throw new VersionMismatchException(VERSION, version);
     }
   }
-  
+
   public void write(DataOutput out) throws IOException {
     out.writeByte(VERSION);
-    out.writeByte((byte)code);
+    out.writeByte((byte) code);
     out.writeLong(lastModified);
     if (args == null) {
       out.writeInt(-1);
@@ -183,7 +195,7 @@ public class ProtocolStatus implements W
   public void setArgs(String[] args) {
     this.args = args;
   }
-  
+
   public String[] getArgs() {
     return args;
   }
@@ -195,74 +207,77 @@ public class ProtocolStatus implements W
   public String getName() {
     return codeToName.get(this.code);
   }
-  
+
   public void setCode(int code) {
     this.code = code;
   }
-  
+
   public boolean isSuccess() {
-    return code == SUCCESS; 
+    return code == SUCCESS;
   }
-  
+
   public boolean isTransientFailure() {
-    return
-        code == ACCESS_DENIED ||
-        code == EXCEPTION ||
-        code == REDIR_EXCEEDED ||
-        code == RETRY ||
-        code == TEMP_MOVED ||
-        code == WOULDBLOCK ||
-        code == PROTO_NOT_FOUND; 
+    return code == ACCESS_DENIED || code == EXCEPTION || code == REDIR_EXCEEDED
+        || code == RETRY || code == TEMP_MOVED || code == WOULDBLOCK
+        || code == PROTO_NOT_FOUND;
   }
-  
+
   public boolean isPermanentFailure() {
-    return
-        code == FAILED ||
-        code == GONE ||
-        code == MOVED ||
-        code == NOTFOUND ||
-        code == ROBOTS_DENIED;
+    return code == FAILED || code == GONE || code == MOVED || code == NOTFOUND
+        || code == ROBOTS_DENIED;
   }
-  
+
   public String getMessage() {
-    if (args != null && args.length > 0) return args[0];
+    if (args != null && args.length > 0)
+      return args[0];
     return null;
   }
-  
+
   public void setMessage(String msg) {
-    if (args != null && args.length > 0) args[0] = msg;
-    else args = new String[] {msg};
+    if (args != null && args.length > 0)
+      args[0] = msg;
+    else
+      args = new String[] { msg };
   }
-  
+
   public long getLastModified() {
     return lastModified;
   }
-  
+
   public void setLastModified(long lastModified) {
     this.lastModified = lastModified;
   }
-  
+
   public boolean equals(Object o) {
-    if (o == null) return false;
-    if (!(o instanceof ProtocolStatus)) return false;
-    ProtocolStatus other = (ProtocolStatus)o;
-    if (this.code != other.code || this.lastModified != other.lastModified) 
return false;
+    if (o == null)
+      return false;
+    if (!(o instanceof ProtocolStatus))
+      return false;
+    ProtocolStatus other = (ProtocolStatus) o;
+    if (this.code != other.code || this.lastModified != other.lastModified)
+      return false;
     if (this.args == null) {
-      if (other.args == null) return true;
-      else return false;
+      if (other.args == null)
+        return true;
+      else
+        return false;
     } else {
-      if (other.args == null) return false;
-      if (other.args.length != this.args.length) return false;
+      if (other.args == null)
+        return false;
+      if (other.args.length != this.args.length)
+        return false;
       for (int i = 0; i < this.args.length; i++) {
-        if (!this.args[i].equals(other.args[i])) return false;
+        if (!this.args[i].equals(other.args[i]))
+          return false;
       }
     }
     return true;
   }
-  
+
   public String toString() {
     StringBuffer res = new StringBuffer();
-    res.append(codeToName.get(new Integer(code)) + "(" + code + "), 
lastModified=" + lastModified);
+    res.append(codeToName.get(new Integer(code)) + "(" + code
+        + "), lastModified=" + lastModified);
     if (args != null) {
       if (args.length == 1) {
         res.append(": " + String.valueOf(args[0]));

Modified: nutch/trunk/src/java/org/apache/nutch/protocol/RobotRules.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/RobotRules.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/RobotRules.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/RobotRules.java Thu Jan 29 
05:38:59 2015
@@ -35,9 +35,8 @@ public interface RobotRules {
   public long getCrawlDelay();
 
   /**
-   * Returns <code>false</code> if the <code>robots.txt</code> file
-   * prohibits us from accessing the given <code>url</code>, or
-   * <code>true</code> otherwise.
+   * Returns <code>false</code> if the <code>robots.txt</code> file prohibits 
us
+   * from accessing the given <code>url</code>, or <code>true</code> otherwise.
    */
   public boolean isAllowed(URL url);
 

Modified: nutch/trunk/src/java/org/apache/nutch/protocol/RobotRulesParser.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/RobotRulesParser.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/RobotRulesParser.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/RobotRulesParser.java Thu 
Jan 29 05:38:59 2015
@@ -43,35 +43,38 @@ import crawlercommons.robots.SimpleRobot
 import crawlercommons.robots.SimpleRobotRulesParser;
 
 /**
- * This class uses crawler-commons for handling the parsing of {@code 
robots.txt} files.
- * It emits SimpleRobotRules objects, which describe the download permissions
- * as described in SimpleRobotRulesParser.
+ * This class uses crawler-commons for handling the parsing of
+ * {@code robots.txt} files. It emits SimpleRobotRules objects, which describe
+ * the download permissions as described in SimpleRobotRulesParser.
  */
 public abstract class RobotRulesParser implements Configurable {
 
-  public static final Logger LOG = 
LoggerFactory.getLogger(RobotRulesParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(RobotRulesParser.class);
 
-  protected static final Hashtable<String, BaseRobotRules> CACHE = new 
Hashtable<String, BaseRobotRules> ();
+  protected static final Hashtable<String, BaseRobotRules> CACHE = new 
Hashtable<String, BaseRobotRules>();
 
   /**
-   *  A {@link BaseRobotRules} object appropriate for use
-   *  when the {@code robots.txt} file is empty or missing;
-   *  all requests are allowed.
+   * A {@link BaseRobotRules} object appropriate for use when the
+   * {@code robots.txt} file is empty or missing; all requests are allowed.
    */
-  public static final BaseRobotRules EMPTY_RULES = new 
SimpleRobotRules(RobotRulesMode.ALLOW_ALL);
+  public static final BaseRobotRules EMPTY_RULES = new SimpleRobotRules(
+      RobotRulesMode.ALLOW_ALL);
 
   /**
-   *  A {@link BaseRobotRules} object appropriate for use when the 
-   *  {@code robots.txt} file is not fetched due to a {@code 403/Forbidden}
-   *  response; all requests are disallowed. 
+   * A {@link BaseRobotRules} object appropriate for use when the
+   * {@code robots.txt} file is not fetched due to a {@code 403/Forbidden}
+   * response; all requests are disallowed.
    */
-  public static BaseRobotRules FORBID_ALL_RULES = new 
SimpleRobotRules(RobotRulesMode.ALLOW_NONE);
+  public static BaseRobotRules FORBID_ALL_RULES = new SimpleRobotRules(
+      RobotRulesMode.ALLOW_NONE);
 
   private static SimpleRobotRulesParser robotParser = new 
SimpleRobotRulesParser();
   private Configuration conf;
   protected String agentNames;
 
-  public RobotRulesParser() { }
+  public RobotRulesParser() {
+  }
 
   public RobotRulesParser(Configuration conf) {
     setConf(conf);
@@ -90,16 +93,18 @@ public abstract class RobotRulesParser i
     }
     agentNames = agentName;
 
-    // If there are any other agents specified, append those to the list of 
agents
+    // If there are any other agents specified, append those to the list of
+    // agents
     String otherAgents = conf.get("http.robots.agents");
-    if(otherAgents != null && !otherAgents.trim().isEmpty()) {
+    if (otherAgents != null && !otherAgents.trim().isEmpty()) {
       StringTokenizer tok = new StringTokenizer(otherAgents, ",");
       StringBuilder sb = new StringBuilder(agentNames);
       while (tok.hasMoreTokens()) {
         String str = tok.nextToken().trim();
         if (str.equals("*") || str.equals(agentName)) {
           // skip wildcard "*" or agent name itself
-          // (required for backward compatibility, cf. NUTCH-1715 and 
NUTCH-1718)
+          // (required for backward compatibility, cf. NUTCH-1715 and
+          // NUTCH-1718)
         } else {
           sb.append(",").append(str);
         }
@@ -117,16 +122,23 @@ public abstract class RobotRulesParser i
   }
 
   /**
-   * Parses the robots content using the {@link SimpleRobotRulesParser} from 
crawler commons
-   *    
-   * @param url A string containing url
-   * @param content Contents of the robots file in a byte array 
-   * @param contentType The content type of the robots file
-   * @param robotName A string containing all the robots agent names used by 
parser for matching
-   * @return BaseRobotRules object 
+   * Parses the robots content using the {@link SimpleRobotRulesParser} from
+   * crawler commons
+   * 
+   * @param url
+   *          A string containing url
+   * @param content
+   *          Contents of the robots file in a byte array
+   * @param contentType
+   *          The content type of the robots file
+   * @param robotName
+   *          A string containing all the robots agent names used by parser for
+   *          matching
+   * @return BaseRobotRules object
    */
-  public BaseRobotRules parseRules (String url, byte[] content, String 
contentType, String robotName) {
-    return robotParser.parseContent(url, content, contentType, robotName); 
+  public BaseRobotRules parseRules(String url, byte[] content,
+      String contentType, String robotName) {
+    return robotParser.parseContent(url, content, contentType, robotName);
   }
 
   public BaseRobotRules getRobotRulesSet(Protocol protocol, Text url) {
@@ -145,23 +157,30 @@ public abstract class RobotRulesParser i
   public static void main(String[] argv) {
 
     if (argv.length != 3) {
-      System.err.println("Usage: RobotRulesParser <robots-file> <url-file> 
<agent-names>\n");
-      System.err.println("\tThe <robots-file> will be parsed as a robots.txt 
file,");
-      System.err.println("\tusing the given <agent-name> to select rules.  
URLs ");
-      System.err.println("\twill be read (one per line) from <url-file>, and 
tested");
-      System.err.println("\tagainst the rules. Multiple agent names can be 
provided using");
+      System.err
+          .println("Usage: RobotRulesParser <robots-file> <url-file> 
<agent-names>\n");
+      System.err
+          .println("\tThe <robots-file> will be parsed as a robots.txt file,");
+      System.err
+          .println("\tusing the given <agent-name> to select rules.  URLs ");
+      System.err
+          .println("\twill be read (one per line) from <url-file>, and 
tested");
+      System.err
+          .println("\tagainst the rules. Multiple agent names can be provided 
using");
       System.err.println("\tcomma as a delimiter without any spaces.");
       System.exit(-1);
     }
 
     try {
       byte[] robotsBytes = Files.toByteArray(new File(argv[0]));
-      BaseRobotRules rules = robotParser.parseContent(argv[0], robotsBytes, 
"text/plain", argv[2]);
+      BaseRobotRules rules = robotParser.parseContent(argv[0], robotsBytes,
+          "text/plain", argv[2]);
 
       LineNumberReader testsIn = new LineNumberReader(new FileReader(argv[1]));
       String testPath = testsIn.readLine().trim();
       while (testPath != null) {
-        System.out.println( (rules.isAllowed(testPath) ? "allowed" : "not 
allowed") + ":\t" + testPath);
+        System.out.println((rules.isAllowed(testPath) ? "allowed"
+            : "not allowed") + ":\t" + testPath);
         testPath = testsIn.readLine();
       }
       testsIn.close();

Modified: nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java Thu Jan 29 
05:38:59 2015
@@ -20,3 +20,4 @@
  * see also {@link org.apache.nutch.net.protocols}.
  */
 package org.apache.nutch.protocol;
+

Modified: 
nutch/trunk/src/java/org/apache/nutch/scoring/AbstractScoringFilter.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/AbstractScoringFilter.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/AbstractScoringFilter.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/AbstractScoringFilter.java 
Thu Jan 29 05:38:59 2015
@@ -17,52 +17,52 @@ import org.apache.nutch.scoring.ScoringF
 
 public abstract class AbstractScoringFilter implements ScoringFilter {
 
-       private Configuration conf;
+  private Configuration conf;
 
-       public Configuration getConf() {
-               return conf;
-       }
-
-       public void setConf(Configuration conf) {
-               this.conf = conf;
-       }
-
-       public void injectedScore(Text url, CrawlDatum datum)
-                       throws ScoringFilterException {
-       }
-
-       public void initialScore(Text url, CrawlDatum datum)
-                       throws ScoringFilterException {
-       }
-
-       public float generatorSortValue(Text url, CrawlDatum datum, float 
initSort)
-                       throws ScoringFilterException {
-               return initSort;
-       }
-
-       public void passScoreBeforeParsing(Text url, CrawlDatum datum,
-                       Content content) throws ScoringFilterException {
-       }
-
-       public void passScoreAfterParsing(Text url, Content content, Parse 
parse)
-                       throws ScoringFilterException {
-       }
-
-       public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
-                       ParseData parseData, Collection<Entry<Text, 
CrawlDatum>> targets,
-                       CrawlDatum adjust, int allCount) throws 
ScoringFilterException {
-               return adjust;
-       }
-
-       public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
-                       List<CrawlDatum> inlinked) throws 
ScoringFilterException {
-       }
-
-       @Override
-       public float indexerScore(Text url, NutchDocument doc, CrawlDatum 
dbDatum,
-                       CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, 
float initScore)
-                       throws ScoringFilterException {
-               return initScore;
-       }
+  public Configuration getConf() {
+    return conf;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  public void injectedScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
+  }
+
+  public void initialScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
+  }
+
+  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
+      throws ScoringFilterException {
+    return initSort;
+  }
+
+  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content 
content)
+      throws ScoringFilterException {
+  }
+
+  public void passScoreAfterParsing(Text url, Content content, Parse parse)
+      throws ScoringFilterException {
+  }
+
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
+    return adjust;
+  }
+
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List<CrawlDatum> inlinked) throws ScoringFilterException {
+  }
+
+  @Override
+  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException {
+    return initScore;
+  }
 
 }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilter.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilter.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilter.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilter.java Thu Jan 29 
05:38:59 2015
@@ -33,129 +33,181 @@ import org.apache.nutch.protocol.Content
 /**
  * A contract defining behavior of scoring plugins.
  * 
- * A scoring filter will manipulate scoring variables in CrawlDatum and
- * in resulting search indexes. Filters can be chained in a specific order,
- * to provide multi-stage scoring adjustments.
+ * A scoring filter will manipulate scoring variables in CrawlDatum and in
+ * resulting search indexes. Filters can be chained in a specific order, to
+ * provide multi-stage scoring adjustments.
  * 
  * @author Andrzej Bialecki
  */
 public interface ScoringFilter extends Configurable, Pluggable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = ScoringFilter.class.getName();
-  
+
   /**
    * Set an initial score for newly injected pages. Note: newly injected pages
-   * may have no inlinks, so filter implementations may wish to set this 
-   * score to a non-zero value, to give newly injected pages some initial
-   * credit.
-   * @param url url of the page
-   * @param datum new datum. Filters will modify it in-place.
+   * may have no inlinks, so filter implementations may wish to set this score
+   * to a non-zero value, to give newly injected pages some initial credit.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          new datum. Filters will modify it in-place.
    * @throws ScoringFilterException
    */
-  public void injectedScore(Text url, CrawlDatum datum) throws 
ScoringFilterException;
-  
+  public void injectedScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException;
+
   /**
-   * Set an initial score for newly discovered pages. Note: newly discovered 
pages
-   * have at least one inlink with its score contribution, so filter 
implementations
-   * may choose to set initial score to zero (unknown value), and then the 
inlink
-   * score contribution will set the "real" value of the new page.
-   * @param url url of the page
-   * @param datum new datum. Filters will modify it in-place.
+   * Set an initial score for newly discovered pages. Note: newly discovered
+   * pages have at least one inlink with its score contribution, so filter
+   * implementations may choose to set initial score to zero (unknown value),
+   * and then the inlink score contribution will set the "real" value of the 
new
+   * page.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          new datum. Filters will modify it in-place.
    * @throws ScoringFilterException
    */
-  public void initialScore(Text url, CrawlDatum datum) throws 
ScoringFilterException;
-  
+  public void initialScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException;
+
   /**
-   * This method prepares a sort value for the purpose of sorting and
-   * selecting top N scoring pages during fetchlist generation.
-   * @param url url of the page
-   * @param datum page's datum, should not be modified
-   * @param initSort initial sort value, or a value from previous filters in 
chain
+   * This method prepares a sort value for the purpose of sorting and selecting
+   * top N scoring pages during fetchlist generation.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          page's datum, should not be modified
+   * @param initSort
+   *          initial sort value, or a value from previous filters in chain
    */
-  public float generatorSortValue(Text url, CrawlDatum datum, float initSort) 
throws ScoringFilterException;
-  
+  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
+      throws ScoringFilterException;
+
   /**
    * This method takes all relevant score information from the current datum
    * (coming from a generated fetchlist) and stores it into
-   * {@link org.apache.nutch.protocol.Content} metadata.
-   * This is needed in order to pass this value(s) to the mechanism that 
distributes it
-   * to outlinked pages.
-   * @param url url of the page
-   * @param datum source datum. NOTE: modifications to this value are not 
persisted.
-   * @param content instance of content. Implementations may modify this
-   * in-place, primarily by setting some metadata properties.
+   * {@link org.apache.nutch.protocol.Content} metadata. This is needed in 
order
+   * to pass this value(s) to the mechanism that distributes it to outlinked
+   * pages.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          source datum. NOTE: modifications to this value are not 
persisted.
+   * @param content
+   *          instance of content. Implementations may modify this in-place,
+   *          primarily by setting some metadata properties.
    */
-  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content 
content) throws ScoringFilterException;
-  
+  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content 
content)
+      throws ScoringFilterException;
+
   /**
    * Currently a part of score distribution is performed using only data coming
    * from the parsing process. We need this method in order to ensure the
    * presence of score data in these steps.
-   * @param url page url
-   * @param content original content. NOTE: modifications to this value are 
not persisted.
-   * @param parse target instance to copy the score information to. 
Implementations
-   * may modify this in-place, primarily by setting some metadata properties.
+   * 
+   * @param url
+   *          page url
+   * @param content
+   *          original content. NOTE: modifications to this value are not
+   *          persisted.
+   * @param parse
+   *          target instance to copy the score information to. Implementations
+   *          may modify this in-place, primarily by setting some metadata
+   *          properties.
    */
-  public void passScoreAfterParsing(Text url, Content content, Parse parse) 
throws ScoringFilterException;
-  
+  public void passScoreAfterParsing(Text url, Content content, Parse parse)
+      throws ScoringFilterException;
+
   /**
    * Distribute score value from the current page to all its outlinked pages.
-   * @param fromUrl url of the source page
-   * @param parseData ParseData instance, which stores relevant score value(s)
-   * in its metadata. NOTE: filters may modify this in-place, all changes will
-   * be persisted.
-   * @param targets &lt;url, CrawlDatum&gt; pairs. NOTE: filters can modify 
this in-place,
-   * all changes will be persisted.
-   * @param adjust a CrawlDatum instance, initially null, which implementations
-   * may use to pass adjustment values to the original CrawlDatum. When 
creating
-   * this instance, set its status to {@link CrawlDatum#STATUS_LINKED}.
-   * @param allCount number of all collected outlinks from the source page
+   * 
+   * @param fromUrl
+   *          url of the source page
+   * @param parseData
+   *          ParseData instance, which stores relevant score value(s) in its
+   *          metadata. NOTE: filters may modify this in-place, all changes 
will
+   *          be persisted.
+   * @param targets
+   *          &lt;url, CrawlDatum&gt; pairs. NOTE: filters can modify this
+   *          in-place, all changes will be persisted.
+   * @param adjust
+   *          a CrawlDatum instance, initially null, which implementations may
+   *          use to pass adjustment values to the original CrawlDatum. When
+   *          creating this instance, set its status to
+   *          {@link CrawlDatum#STATUS_LINKED}.
+   * @param allCount
+   *          number of all collected outlinks from the source page
    * @return if needed, implementations may return an instance of CrawlDatum,
-   * with status {@link CrawlDatum#STATUS_LINKED}, which contains adjustments
-   * to be applied to the original CrawlDatum score(s) and metadata. This can
-   * be null if not needed.
+   *         with status {@link CrawlDatum#STATUS_LINKED}, which contains
+   *         adjustments to be applied to the original CrawlDatum score(s) and
+   *         metadata. This can be null if not needed.
    * @throws ScoringFilterException
    */
-  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData 
parseData, 
-          Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust,
-          int allCount) throws ScoringFilterException;
-
-  /**
-   * This method calculates a new score of CrawlDatum during CrawlDb update, 
based on the
-   * initial value of the original CrawlDatum, and also score values 
contributed by
-   * inlinked pages.
-   * @param url url of the page
-   * @param old original datum, with original score. May be null if this is a 
newly
-   * discovered page. If not null, filters should use score values from this 
parameter
-   * as the starting values - the <code>datum</code> parameter may contain 
values that are
-   * no longer valid, if other updates occured between generation and this 
update.
-   * @param datum the new datum, with the original score saved at the time when
-   * fetchlist was generated. Filters should update this in-place, and it will 
be saved in
-   * the crawldb.
-   * @param inlinked (partial) list of CrawlDatum-s (with their scores) from
-   * links pointing to this page, found in the current update batch.
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException;
+
+  /**
+   * This method calculates a new score of CrawlDatum during CrawlDb update,
+   * based on the initial value of the original CrawlDatum, and also score
+   * values contributed by inlinked pages.
+   * 
+   * @param url
+   *          url of the page
+   * @param old
+   *          original datum, with original score. May be null if this is a
+   *          newly discovered page. If not null, filters should use score
+   *          values from this parameter as the starting values - the
+   *          <code>datum</code> parameter may contain values that are no 
longer
+   *          valid, if other updates occured between generation and this
+   *          update.
+   * @param datum
+   *          the new datum, with the original score saved at the time when
+   *          fetchlist was generated. Filters should update this in-place, and
+   *          it will be saved in the crawldb.
+   * @param inlinked
+   *          (partial) list of CrawlDatum-s (with their scores) from links
+   *          pointing to this page, found in the current update batch.
    * @throws ScoringFilterException
    */
-  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum, 
List<CrawlDatum> inlinked) throws ScoringFilterException;
-  
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List<CrawlDatum> inlinked) throws ScoringFilterException;
+
   /**
    * This method calculates a Lucene document boost.
-   * @param url url of the page
-   * @param doc Lucene document. NOTE: this already contains all information 
collected
-   * by indexing filters. Implementations may modify this instance, in order 
to store/remove
-   * some information.
-   * @param dbDatum current page from CrawlDb. NOTE: changes made to this 
instance
-   * are not persisted.
-   * @param fetchDatum datum from FetcherOutput (containing among others the 
fetching status)
-   * @param parse parsing result. NOTE: changes made to this instance are not 
persisted.
-   * @param inlinks current inlinks from LinkDb. NOTE: changes made to this 
instance are
-   * not persisted.
-   * @param initScore initial boost value for the Lucene document.
-   * @return boost value for the Lucene document. This value is passed as an 
argument
-   * to the next scoring filter in chain. NOTE: implementations may also 
express
-   * other scoring strategies by modifying Lucene document directly.
+   * 
+   * @param url
+   *          url of the page
+   * @param doc
+   *          Lucene document. NOTE: this already contains all information
+   *          collected by indexing filters. Implementations may modify this
+   *          instance, in order to store/remove some information.
+   * @param dbDatum
+   *          current page from CrawlDb. NOTE: changes made to this instance 
are
+   *          not persisted.
+   * @param fetchDatum
+   *          datum from FetcherOutput (containing among others the fetching
+   *          status)
+   * @param parse
+   *          parsing result. NOTE: changes made to this instance are not
+   *          persisted.
+   * @param inlinks
+   *          current inlinks from LinkDb. NOTE: changes made to this instance
+   *          are not persisted.
+   * @param initScore
+   *          initial boost value for the Lucene document.
+   * @return boost value for the Lucene document. This value is passed as an
+   *         argument to the next scoring filter in chain. NOTE: 
implementations
+   *         may also express other scoring strategies by modifying Lucene
+   *         document directly.
    * @throws ScoringFilterException
    */
   public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
-          CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float 
initScore) throws ScoringFilterException;
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException;
 }

svn commit: r1655526 [8/26] - in /nutch/trunk: ./ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/metadata/ src/java/org/apache/nutch/net/ src/java/org/apache/nutch/net/pr...

Reply via email to