NUTCH-2089 Nutch 2.x is moved to compile on JDK 8

Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/0ea78907
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/0ea78907
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/0ea78907

Branch: refs/heads/2.x
Commit: 0ea78907dee6b07058b66a99e395aea8cf623e92
Parents: 7dcc5fa
Author: Furkan KAMACI <[email protected]>
Authored: Sun Sep 4 00:53:31 2016 +0300
Committer: Furkan KAMACI <[email protected]>
Committed: Sun Sep 4 01:38:13 2016 +0300

----------------------------------------------------------------------
 src/java/org/apache/nutch/api/NutchServer.java  | 23 +++---
 .../apache/nutch/api/impl/RAMConfManager.java   |  3 +
 .../nutch/crawl/AbstractFetchSchedule.java      | 23 +++---
 .../nutch/crawl/AdaptiveFetchSchedule.java      |  6 +-
 .../org/apache/nutch/crawl/FetchSchedule.java   | 25 ++----
 .../org/apache/nutch/crawl/GeneratorJob.java    | 20 ++++-
 .../apache/nutch/crawl/SignatureFactory.java    |  7 +-
 .../nutch/crawl/TextProfileSignature.java       |  2 +-
 .../org/apache/nutch/fetcher/FetcherJob.java    | 12 ++-
 .../org/apache/nutch/indexer/IndexUtil.java     |  4 +-
 .../org/apache/nutch/net/URLNormalizers.java    | 16 ++--
 .../apache/nutch/parse/NutchSitemapParse.java   |  4 +-
 .../apache/nutch/parse/ParsePluginsReader.java  |  6 +-
 src/java/org/apache/nutch/parse/Parser.java     |  2 +-
 .../org/apache/nutch/parse/ParserChecker.java   |  6 +-
 .../apache/nutch/plugin/PluginRepository.java   |  6 +-
 .../org/apache/nutch/scoring/ScoringFilter.java | 19 ++---
 .../org/apache/nutch/storage/StorageUtils.java  |  2 +-
 .../apache/nutch/tools/arc/ArcRecordReader.java | 20 ++---
 src/java/org/apache/nutch/util/Bytes.java       | 12 +--
 .../org/apache/nutch/util/EncodingDetector.java | 10 +--
 src/java/org/apache/nutch/util/MimeUtil.java    |  4 +-
 src/java/org/apache/nutch/util/NodeWalker.java  | 10 +--
 src/java/org/apache/nutch/util/NutchJob.java    |  4 +-
 src/java/org/apache/nutch/util/NutchTool.java   | 17 +++-
 .../apache/nutch/util/PrefixStringMatcher.java  |  8 +-
 .../apache/nutch/util/SuffixStringMatcher.java  |  8 +-
 src/java/org/apache/nutch/util/TableUtil.java   |  4 +-
 src/java/org/apache/nutch/util/TimingUtil.java  |  2 +-
 .../apache/nutch/util/TrieStringMatcher.java    |  8 +-
 src/java/org/apache/nutch/util/URLUtil.java     | 83 ++++++++++----------
 .../apache/nutch/util/domain/DomainSuffix.java  |  5 +-
 .../nutch/util/domain/TopLevelDomain.java       |  4 +-
 .../org/apache/nutch/parse/feed/FeedParser.java |  2 +-
 .../indexer/anchor/AnchorIndexingFilter.java    |  2 +-
 .../nutch/indexer/metadata/MetadataIndexer.java |  2 +-
 .../nutch/indexer/more/MoreIndexingFilter.java  |  4 +-
 .../nutch/analysis/lang/HTMLLanguageParser.java | 15 +++-
 .../nutch/urlfilter/api/RegexURLFilterBase.java | 12 +--
 .../org/apache/nutch/parse/html/DOMBuilder.java | 50 ++++++------
 .../nutch/parse/html/DOMContentUtils.java       |  2 +-
 .../parse/html/XMLCharacterRecognizer.java      |  2 +-
 .../apache/nutch/parse/js/JSParseFilter.java    |  6 +-
 .../org/apache/nutch/parse/swf/SWFParser.java   |  2 +-
 .../org/apache/nutch/parse/tika/DOMBuilder.java | 50 ++++++------
 .../parse/tika/XMLCharacterRecognizer.java      |  2 +-
 .../apache/nutch/parse/tika/TestRSSParser.java  |  2 +-
 .../org/apache/nutch/protocol/file/File.java    |  4 +-
 .../nutch/protocol/file/TestProtocolFile.java   |  2 +-
 .../org/apache/nutch/protocol/ftp/Client.java   |  2 +-
 .../java/org/apache/nutch/protocol/ftp/Ftp.java |  4 +-
 .../DummySSLProtocolSocketFactory.java          |  2 +-
 .../apache/nutch/protocol/httpclient/Http.java  |  4 +-
 .../httpclient/HttpBasicAuthentication.java     |  4 +-
 .../apache/nutch/scoring/link/package-info.java |  3 +-
 .../nutch/scoring/opic/OPICScoringFilter.java   |  2 +-
 .../nutch/collection/CollectionManager.java     |  2 +-
 .../nutch/urlfilter/domain/DomainURLFilter.java | 11 +--
 .../nutch/urlfilter/domain/package-info.java    |  2 -
 .../nutch/urlfilter/prefix/PrefixURLFilter.java |  2 +-
 .../nutch/urlfilter/suffix/SuffixURLFilter.java | 14 ++--
 .../nutch/urlfilter/validator/UrlValidator.java | 10 +--
 .../urlnormalizer/regex/RegexURLNormalizer.java |  4 +-
 .../nutch/api/AbstractNutchAPITestBase.java     |  2 +-
 .../org/apache/nutch/crawl/TestGenerator.java   |  2 +-
 .../org/apache/nutch/fetcher/TestFetcher.java   |  6 +-
 .../org/apache/nutch/util/CrawlTestUtil.java    |  7 +-
 67 files changed, 318 insertions(+), 309 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/api/NutchServer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/api/NutchServer.java 
b/src/java/org/apache/nutch/api/NutchServer.java
index 802bbef..5118497 100644
--- a/src/java/org/apache/nutch/api/NutchServer.java
+++ b/src/java/org/apache/nutch/api/NutchServer.java
@@ -98,9 +98,9 @@ public class NutchServer extends Application {
    * 'INFO' however best attempts should always be made to specify a logging
    * level.&lt;br&gt;
    * {@link org.apache.nutch.api.NutchServer} can be run as secure. 
restapi.auth property
-   * should be set to BASIC, DIGEST or SSL at 
&lt;code&gt;nutch-site.xml&lt;/code&gt; to enable HTTP basic authentication,
+   * should be set to BASIC, DIGEST or SSL at <code>nutch-site.xml</code> to 
enable HTTP basic authentication,
    * digest authentication or SSL when communicating with RESTAPI.
-   * Set restapi.auth.username and restapi.auth.password properties at 
&lt;code&gt;nutch-site.xml&lt;/code&gt; to configure
+   * Set restapi.auth.username and restapi.auth.password properties at 
<code>nutch-site.xml</code> to configure
    * credentials when BASIC or DIGEST authentication is used.
    * Set restapi.auth.ssl.storepath, restapi.auth.ssl.storepass and 
restapi.auth.ssl.keypass when SSL is used.
    *
@@ -117,12 +117,14 @@ public class NutchServer extends Application {
    * 'INFO' however best attempts should always be made to specify a logging
    * level.&lt;br&gt;
    * {@link org.apache.nutch.api.NutchServer} can be run as secure. 
restapi.auth property
-   * should be set to BASIC, DIGEST or SSL at 
&lt;code&gt;nutch-site.xml&lt;/code&gt; to enable HTTP basic authentication,
+   * should be set to BASIC, DIGEST or SSL at <code>nutch-site.xml</code> to 
enable HTTP basic authentication,
    * digest authentication or SSL when communicating with RESTAPI.
-   * Set restapi.auth.username and restapi.auth.password properties at 
&lt;code&gt;nutch-site.xml&lt;/code&gt; to configure
+   * Set restapi.auth.username and restapi.auth.password properties at 
<code>nutch-site.xml</code> to configure
    * credentials when BASIC or DIGEST authentication is used.
    * Set restapi.auth.ssl.storepath, restapi.auth.ssl.storepass and 
restapi.auth.ssl.keypass when SSL is used.
    *
+   * @param ramConfManager {@link RAMConfManager}
+   *
    * @see org.apache.nutch.api.security.AuthenticationTypeEnum
    */
   public NutchServer(RAMConfManager ramConfManager) {
@@ -137,12 +139,15 @@ public class NutchServer extends Application {
    * 'INFO' however best attempts should always be made to specify a logging
    * level.&lt;br&gt;
    * {@link org.apache.nutch.api.NutchServer} can be run as secure. 
restapi.auth property
-   * should be set to BASIC, DIGEST or SSL at 
&lt;code&gt;nutch-site.xml&lt;/code&gt; to enable HTTP basic authentication,
+   * should be set to BASIC, DIGEST or SSL at <code>nutch-site.xml</code> to 
enable HTTP basic authentication,
    * digest authentication or SSL when communicating with RESTAPI.
-   * Set restapi.auth.username and restapi.auth.password properties at 
&lt;code&gt;nutch-site.xml&lt;/code&gt; to configure
+   * Set restapi.auth.username and restapi.auth.password properties at 
<code>nutch-site.xml</code> to configure
    * credentials when BASIC or DIGEST authentication is used.
    * Set restapi.auth.ssl.storepath, restapi.auth.ssl.storepass and 
restapi.auth.ssl.keypass when SSL is used.
    *
+   * @param ramConfManager {@link RAMConfManager}
+   * @param confId active configuration id
+   *
    * @see org.apache.nutch.api.security.AuthenticationTypeEnum
    */
   public NutchServer(RAMConfManager ramConfManager, String confId) {
@@ -305,7 +310,7 @@ public class NutchServer extends Application {
   /**
    * Safety and convenience method to determine whether or not it is safe to
    * shut down the server. We make this assertion by consulting the
-   * {@link org.apache.nutch.api.NutchApp#jobManager} for a list of jobs with
+   * {@link #getJobMgr()}  for a list of jobs with
    * {@link org.apache.nutch.api.model.response.JobInfo#state} equal to
    * 'RUNNING'.
    * 
@@ -356,8 +361,8 @@ public class NutchServer extends Application {
   /**
    * Main method for NutchServer to run via command line.
    *
-   * @param args  arguments for log level, stopping the Server and port.
-   * @throws Exception
+   * @param args arguments for log level, stopping the Server and port.
+   * @throws Exception exception
    */
   public static void main(String[] args) throws Exception {
     CommandLineParser parser = new PosixParser();

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/api/impl/RAMConfManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/api/impl/RAMConfManager.java 
b/src/java/org/apache/nutch/api/impl/RAMConfManager.java
index 13c05fd..356a8bd 100644
--- a/src/java/org/apache/nutch/api/impl/RAMConfManager.java
+++ b/src/java/org/apache/nutch/api/impl/RAMConfManager.java
@@ -50,6 +50,9 @@ public class RAMConfManager implements ConfManager {
 
   /**
    * Public constructor which accepts a configuration id and {@link 
Configuration} type configuration.
+   *
+   * @param confId configuration id
+   * @param configuration configuration
    */
   public RAMConfManager(String confId, Configuration configuration) {
     configurations.put(confId, configuration);

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java 
b/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
index 045f4cd..8070c7b 100755
--- a/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
+++ b/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
@@ -76,6 +76,7 @@ public abstract class AbstractFetchSchedule extends 
Configured implements
    * @param url
    *          URL of the page.
    * @param page
+   *          {@link WebPage} object relative to the URL
    */
   @Override
   public void initializeSchedule(String url, WebPage page) {
@@ -104,13 +105,7 @@ public abstract class AbstractFetchSchedule extends 
Configured implements
    * @param url
    *          URL of the page
    * @param page
-   * @return adjusted page information, including all original information.
-   *         NOTE: this may be a different instance than
-   * @param datum
-   *          , but implementations should make sure that it contains at least
-   *          all information from
-   * @param datum
-   *          .
+   *          {@link WebPage} object relative to the URL
    */
   @Override
   public void setPageGoneSchedule(String url, WebPage page, long prevFetchTime,
@@ -134,6 +129,7 @@ public abstract class AbstractFetchSchedule extends 
Configured implements
    * @param url
    *          URL of the page
    * @param page
+   *          {@link WebPage} object relative to the URL
    * @param prevFetchTime
    *          previous fetch time
    * @param prevModifiedTime
@@ -163,15 +159,15 @@ public abstract class AbstractFetchSchedule extends 
Configured implements
    * in the current fetchlist. NOTE: a true return value does not guarantee 
that
    * the page will be fetched, it just allows it to be included in the further
    * selection process based on scores. The default implementation checks
-   * <code>fetchTime</code>, if it is higher than the
-   * 
-   * @param curTime
-   *          it returns false, and true otherwise. It will also check that
-   *          fetchTime is not too remote (more than <code>maxInterval</code),
-   *          in which case it lowers the interval and returns true.
+   * <code>fetchTime</code>, if it is higher than the current time
+   * it returns false, and true otherwise. It will also check that
+   * fetchTime is not too remote (more than <code>maxInterval</code>),
+   * in which case it lowers the interval and returns true.
+   *
    * @param url
    *          URL of the page
    * @param page
+   *          {@link WebPage} object relative to the URL
    * @param curTime
    *          reference time (usually set to the time when the fetchlist
    *          generation process was started).
@@ -200,6 +196,7 @@ public abstract class AbstractFetchSchedule extends 
Configured implements
    * @param url
    *          URL of the page
    * @param page
+   *          {@link WebPage} object relative to the URL
    * @param asap
    *          if true, force refetch as soon as possible - this sets the
    *          fetchTime to now. If false, force refetch whenever the next fetch

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java 
b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
index 1c2780a..30c6ec7 100755
--- a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
+++ b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
@@ -46,10 +46,8 @@ import org.apache.nutch.storage.WebPage;
  * <p>
  * NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize
  * the algorithm, so that the fetch interval either increases or decreases
- * infinitely, with little relevance to the page changes. Please use
- * {@link #main(String[])} method to test the values before applying them in a
- * production system.
- * </p>
+ * infinitely, with little relevance to the page changes.
+ *
  * 
  * @author Andrzej Bialecki
  */

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/crawl/FetchSchedule.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/crawl/FetchSchedule.java 
b/src/java/org/apache/nutch/crawl/FetchSchedule.java
index eb896a6..8219a61 100755
--- a/src/java/org/apache/nutch/crawl/FetchSchedule.java
+++ b/src/java/org/apache/nutch/crawl/FetchSchedule.java
@@ -49,6 +49,7 @@ public interface FetchSchedule extends Configurable {
    * @param url
    *          URL of the page.
    * @param page
+   *          {@link WebPage} object relative to the URL
    */
   public void initializeSchedule(String url, WebPage page);
 
@@ -60,21 +61,16 @@ public interface FetchSchedule extends Configurable {
    * @param url
    *          url of the page
    * @param page
+   *          {@link WebPage} object relative to the URL
    * @param prevFetchTime
    *          previous value of fetch time, or -1 if not available
    * @param prevModifiedTime
    *          previous value of modifiedTime, or -1 if not available
    * @param fetchTime
-   *          the latest time, when the page was recently re-fetched. Most
-   *          FetchSchedule implementations should update the value in
-   * @param datum
-   *          to something greater than this value.
+   *          the latest time, when the page was recently re-fetched.
    * @param modifiedTime
    *          last time the content was modified. This information comes from
-   *          the protocol implementations, or is set to < 0 if not available.
-   *          Most FetchSchedule implementations should update the value in
-   * @param datum
-   *          to this value.
+   *          the protocol implementations, or is set to &lt; 0 if not 
available.
    * @param state
    *          if {@link #STATUS_MODIFIED}, then the content is considered to be
    *          "changed" before the <code>fetchTime</code>, if
@@ -90,13 +86,10 @@ public interface FetchSchedule extends Configurable {
 
   /**
    * This method specifies how to schedule refetching of pages marked as GONE.
-   * Default implementation increases fetchInterval by 50%, and if it exceeds
-   * the <code>maxInterval</code> it calls
-   * {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * 
    * @param url
    *          URL of the page
    * @param page
+   *          {@link WebPage} object relative to the URL
    */
   public void setPageGoneSchedule(String url, WebPage page, long prevFetchTime,
       long prevModifiedTime, long fetchTime);
@@ -109,6 +102,7 @@ public interface FetchSchedule extends Configurable {
    * @param url
    *          URL of the page
    * @param page
+   *          {@link WebPage} object relative to the URL
    * @param prevFetchTime
    *          previous fetch time
    * @param prevModifiedTime
@@ -133,14 +127,8 @@ public interface FetchSchedule extends Configurable {
    * selection process based on scores. The default implementation checks
    * <code>fetchTime</code>, if it is higher than the
    * 
-   * @param curTime
-   *          it returns false, and true otherwise. It will also check that
-   *          fetchTime is not too remote (more than <code>maxInterval</code),
-   *          in which case it lowers the interval and returns true.
    * @param url
    *          URL of the page
-   * @param row
-   *          url's row
    * @param curTime
    *          reference time (usually set to the time when the fetchlist
    *          generation process was started).
@@ -155,7 +143,6 @@ public interface FetchSchedule extends Configurable {
    * 
    * @param url
    *          URL of the page
-   * @param page
    * @param asap
    *          if true, force refetch as soon as possible - this sets the
    *          fetchTime to now. If false, force refetch whenever the next fetch

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/crawl/GeneratorJob.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/crawl/GeneratorJob.java 
b/src/java/org/apache/nutch/crawl/GeneratorJob.java
index f47637f..1627590 100644
--- a/src/java/org/apache/nutch/crawl/GeneratorJob.java
+++ b/src/java/org/apache/nutch/crawl/GeneratorJob.java
@@ -165,7 +165,11 @@ public class GeneratorJob extends NutchTool implements 
Tool {
     return fields;
   }
 
-  /** Generate a random batch id */
+  /**
+   * Generates a random batch id
+   *
+   * @return random batch id
+   */
   public static String randomBatchId() {
     long curTime = System.currentTimeMillis();
     int randomSeed = Math.abs(new Random().nextInt());
@@ -173,6 +177,13 @@ public class GeneratorJob extends NutchTool implements 
Tool {
     return batchId;
   }
 
+  /**
+   * Runs generator
+   *
+   * @param args map of arguments
+   * @return results
+   * @throws Exception
+   */
   public Map<String, Object> run(Map<String, Object> args) throws Exception {
     String batchId = (String) args.get(Nutch.ARG_BATCH);
     if (batchId == null) {
@@ -290,6 +301,13 @@ public class GeneratorJob extends NutchTool implements 
Tool {
     return batchId;
   }
 
+  /**
+   * Runs generator from commandline
+   *
+   * @param args arguments
+   * @return returns -1
+   * @throws Exception
+   */
   public int run(String[] args) throws Exception {
     if (args.length <= 0) {
       System.out

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/crawl/SignatureFactory.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/crawl/SignatureFactory.java 
b/src/java/org/apache/nutch/crawl/SignatureFactory.java
index 1577634..8cf7471 100644
--- a/src/java/org/apache/nutch/crawl/SignatureFactory.java
+++ b/src/java/org/apache/nutch/crawl/SignatureFactory.java
@@ -40,7 +40,12 @@ public class SignatureFactory {
   private SignatureFactory() {
   } // no public ctor
 
-  /** Return the default Signature implementation. */
+  /**
+   * Returns the default {@link Signature} implementation
+   *
+   * @param conf configuration
+   * @return default {@link Signature} implementation
+   */
   public static Signature getSignature(Configuration conf) {
     String clazz = conf.get("db.signature.class", 
MD5Signature.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/crawl/TextProfileSignature.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/crawl/TextProfileSignature.java 
b/src/java/org/apache/nutch/crawl/TextProfileSignature.java
index 6d7e5e0..f797b10 100644
--- a/src/java/org/apache/nutch/crawl/TextProfileSignature.java
+++ b/src/java/org/apache/nutch/crawl/TextProfileSignature.java
@@ -33,7 +33,7 @@ import org.apache.nutch.storage.WebPage;
  * An implementation of a page signature. It calculates an MD5 hash of a plain
  * text "profile" of a page. In case there is no text, it calculates a hash
  * using the {@link MD5Signature}.
- * </p>
+ *
  * <p>
  * The algorithm to calculate a page "profile" takes the plain text version of 
a
  * page and performs the following steps:

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/fetcher/FetcherJob.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/fetcher/FetcherJob.java 
b/src/java/org/apache/nutch/fetcher/FetcherJob.java
index a7f3df8..015c209 100644
--- a/src/java/org/apache/nutch/fetcher/FetcherJob.java
+++ b/src/java/org/apache/nutch/fetcher/FetcherJob.java
@@ -79,19 +79,17 @@ public class FetcherJob extends NutchTool implements Tool {
   /**
    * <p>
    * Mapper class for Fetcher.
-   * </p>
+   *
    * <p>
    * This class reads the random integer written by {@link GeneratorJob} as its
    * key while outputting the actual key and value arguments through a
    * {@link FetchEntry} instance.
-   * </p>
+   *
    * <p>
-   * This approach (combined with the use of {@link PartitionUrlByHost}) makes
-   * sure that Fetcher is still polite while also randomizing the key order. If
    * one host has a huge number of URLs in your table while other hosts have
    * not, {@link FetcherReducer} will not be stuck on one host but process URLs
    * from other hosts as well.
-   * </p>
+   *
    */
   public static class FetcherMapper extends
   GoraMapper<String, WebPage, IntWritable, FetchEntry> {
@@ -246,7 +244,7 @@ public class FetcherJob extends NutchTool implements Tool {
    *          number of threads per map task
    * @param shouldResume
    * @param numTasks
-   *          number of fetching tasks (reducers). If set to < 1 then use the
+   *          number of fetching tasks (reducers). If set to &lt; 1 then use 
the
    *          default, which is mapred.map.tasks.
    * @return 0 on success
    * @throws Exception
@@ -266,7 +264,7 @@ public class FetcherJob extends NutchTool implements Tool {
    *          number of threads per map task
    * @param shouldResume
    * @param numTasks
-   *          number of fetching tasks (reducers). If set to < 1 then use the
+   *          number of fetching tasks (reducers). If set to &lt; 1 then use 
the
    *          default, which is mapred.map.tasks.
    * @param stmDetect
    *          If set true, sitemap detection is run.

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/indexer/IndexUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/indexer/IndexUtil.java 
b/src/java/org/apache/nutch/indexer/IndexUtil.java
index 6d12382..ddb6f0d 100644
--- a/src/java/org/apache/nutch/indexer/IndexUtil.java
+++ b/src/java/org/apache/nutch/indexer/IndexUtil.java
@@ -42,7 +42,7 @@ public class IndexUtil {
   }
 
   /**
-   * Index a {@link Webpage}, here we add the following fields:
+   * Index a {@link WebPage}, here we add the following fields:
    * <ol>
    * <li><tt>id</tt>: default uniqueKey for the {@link NutchDocument}.</li>
    * <li><tt>digest</tt>: Digest is used to identify pages (like unique ID) and
@@ -60,7 +60,7 @@ public class IndexUtil {
    * @param key
    *          The key of the page (reversed url).
    * @param page
-   *          The {@link Webpage}.
+   *          The {@link WebPage}.
    * @return The indexed document, or null if skipped by index filters.
    */
   public NutchDocument index(String key, WebPage page) {

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/net/URLNormalizers.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/net/URLNormalizers.java 
b/src/java/org/apache/nutch/net/URLNormalizers.java
index 03a0b85..1fc1df8 100644
--- a/src/java/org/apache/nutch/net/URLNormalizers.java
+++ b/src/java/org/apache/nutch/net/URLNormalizers.java
@@ -51,30 +51,30 @@ import org.apache.nutch.util.ObjectCache;
  * order). If there are more normalizers activated than explicitly named on 
this
  * list, the remaining ones will be run in random order after the ones 
specified
  * on the list are executed.
- * </p>
+ *
  * <p>
  * You can define a set of contexts (or scopes) in which normalizers may be
  * called. Each scope can have its own list of normalizers (defined in
- * "urlnormalizer.scope.<scope_name>" property) and its own order (defined in
- * "urlnormalizer.order.<scope_name>" property). If any of these properties are
+ * "urlnormalizer.scope.&lt;scope_name&gt;" property) and its own order 
(defined in
+ * "urlnormalizer.order.&lt;scope_name&gt;" property). If any of these 
properties are
  * missing, default settings are used for the global scope.
- * </p>
+ *
  * <p>
  * In case no normalizers are required for any given scope, a
  * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> 
should
  * be used.
- * </p>
+ *
  * <p>
  * Each normalizer may further select among many configurations, depending on
  * the scope in which it is called, because the scope name is passed as a
  * parameter to each normalizer. You can also use the same normalizer for many
  * scopes.
- * </p>
+ *
  * <p>
  * Several scopes have been defined, and various Nutch tools will attempt using
  * scope-specific normalizers first (and fall back to default config if
  * scope-specific configuration is missing).
- * </p>
+ *
  * <p>
  * Normalizers may be run several times, to ensure that modifications 
introduced
  * by normalizers at the end of the list can be further reduced by normalizers
@@ -83,7 +83,7 @@ import org.apache.nutch.util.ObjectCache;
  * want to run this loop up to the number of activated normalizers. This loop
  * count can be configured through <tt>urlnormalizer.loop.count</tt> property.
  * As soon as the url is unchanged the loop will stop and return the result.
- * </p>
+ *
  * 
  * @author Andrzej Bialecki
  */

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/parse/NutchSitemapParse.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/parse/NutchSitemapParse.java 
b/src/java/org/apache/nutch/parse/NutchSitemapParse.java
index c0a9d9b..0e57339 100644
--- a/src/java/org/apache/nutch/parse/NutchSitemapParse.java
+++ b/src/java/org/apache/nutch/parse/NutchSitemapParse.java
@@ -6,9 +6,9 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * <p/>
+ * <p>
  * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
+ * <p>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/parse/ParsePluginsReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/parse/ParsePluginsReader.java 
b/src/java/org/apache/nutch/parse/ParsePluginsReader.java
index dddd025..b4c6f4e 100644
--- a/src/java/org/apache/nutch/parse/ParsePluginsReader.java
+++ b/src/java/org/apache/nutch/parse/ParsePluginsReader.java
@@ -69,12 +69,10 @@ public class ParsePluginsReader {
 
   /**
    * Reads the <code>parse-plugins.xml</code> file and returns the
-   * {@link #ParsePluginList} defined by it.
+   * {@link ParsePluginList} defined by it.
    * 
-   * @return A {@link #ParsePluginList} specified by the
+   * @return A {@link ParsePluginList} specified by the
    *         <code>parse-plugins.xml</code> file.
-   * @throws Exception
-   *           If any parsing error occurs.
    */
   public ParsePluginList parse(Configuration conf) {
 

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/parse/Parser.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/parse/Parser.java 
b/src/java/org/apache/nutch/parse/Parser.java
index b623fd0..9a8c2b7 100644
--- a/src/java/org/apache/nutch/parse/Parser.java
+++ b/src/java/org/apache/nutch/parse/Parser.java
@@ -34,7 +34,7 @@ public interface Parser extends FieldPluggable, Configurable {
   /**
    * <p>
    * This method parses content in WebPage instance
-   * </p>
+   *
    * 
    * @param url
    *          Page's URL

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/parse/ParserChecker.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/parse/ParserChecker.java 
b/src/java/org/apache/nutch/parse/ParserChecker.java
index 12faeae..4d5c572 100644
--- a/src/java/org/apache/nutch/parse/ParserChecker.java
+++ b/src/java/org/apache/nutch/parse/ParserChecker.java
@@ -48,13 +48,13 @@ import java.util.Map.Entry;
  * is used to remove duplicates during the dedup procedure. It is calculated
  * using {@link org.apache.nutch.crawl.MD5Signature} or
  * {@link org.apache.nutch.crawl.TextProfileSignature}.</li>
- * <li><tt>Version</tt>: From {@link org.apache.nutch.parse.ParseData}.</li>
- * <li><tt>Status</tt>: From {@link org.apache.nutch.parse.ParseData}.</li>
+ * <li><tt>Version</tt>: From org.apache.nutch.parse.ParseData.</li>
+ * <li><tt>Status</tt>: From org.apache.nutch.parse.ParseData.</li>
  * <li><tt>Title</tt>: of the URL</li>
  * <li><tt>Outlinks</tt>: associated with the URL</li>
  * <li><tt>Content Metadata</tt>: such as <i>X-AspNet-Version</i>, <i>Date</i>,
  * <i>Content-length</i>, <i>servedBy</i>, <i>Content-Type</i>,
- * <i>Cache-Control</>, etc.</li>
+ * <i>Cache-Control</i>, etc.</li>
  * <li><tt>Parse Metadata</tt>: such as <i>CharEncodingForConversion</i>,
  * <i>OriginalCharEncoding</i>, <i>language</i>, etc.</li>
  * <li><tt>ParseText</tt>: The page parse text which varies in length 
depdnecing

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/plugin/PluginRepository.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/plugin/PluginRepository.java 
b/src/java/org/apache/nutch/plugin/PluginRepository.java
index 346ae31..6486f63 100644
--- a/src/java/org/apache/nutch/plugin/PluginRepository.java
+++ b/src/java/org/apache/nutch/plugin/PluginRepository.java
@@ -59,8 +59,10 @@ public class PluginRepository {
       .getLogger(PluginRepository.class);
 
   /**
-   * @throws PluginRuntimeException
-   * @see java.lang.Object#Object()
+   * Pluging repository constructor
+   *
+   * @param conf Configuration
+   * @throws RuntimeException
    */
   public PluginRepository(Configuration conf) throws RuntimeException {
     fActivatedPlugins = new HashMap<String, Plugin>();

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/scoring/ScoringFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/scoring/ScoringFilter.java 
b/src/java/org/apache/nutch/scoring/ScoringFilter.java
index 8c06ef6..17c6350 100644
--- a/src/java/org/apache/nutch/scoring/ScoringFilter.java
+++ b/src/java/org/apache/nutch/scoring/ScoringFilter.java
@@ -72,8 +72,8 @@ public interface ScoringFilter extends Configurable, 
FieldPluggable {
    * 
    * @param url
    *          url of the page
-   * @param datum
-   *          page row. Modifications will be persisted.
+   * @param page
+   *          {@link WebPage} object relative to the URL
    * @param initSort
    *          initial sort value, or a value from previous filters in chain
    */
@@ -85,13 +85,8 @@ public interface ScoringFilter extends Configurable, 
FieldPluggable {
    * 
    * @param fromUrl
    *          url of the source page
-   * @param row
-   *          page row
    * @param scoreData
-   *          A list of {@link OutlinkedScoreDatum}s for every outlink. These
-   *          {@link OutlinkedScoreDatum}s will be passed to
-   *          {@link #updateScore(String, OldWebTableRow, List)} for every
-   *          outlinked URL.
+   *          A list of {@link ScoreDatum}
    * @param allCount
    *          number of all collected outlinks from the source page
    * @throws ScoringFilterException
@@ -106,9 +101,9 @@ public interface ScoringFilter extends Configurable, 
FieldPluggable {
    * 
    * @param url
    *          url of the page
-   * @param page
-   * @param inlinked
-   *          list of {@link OutlinkedScoreDatum}s for all inlinks pointing to
+   * @param page {@link WebPage} object relative to the URL
+   * @param inlinkedScoreData
+   *          list of {@link ScoreDatum}s for all inlinks pointing to
    *          this URL.
    * @throws ScoringFilterException
    */
@@ -124,8 +119,6 @@ public interface ScoringFilter extends Configurable, 
FieldPluggable {
    *          document. NOTE: this already contains all information collected 
by
    *          indexing filters. Implementations may modify this instance, in
    *          order to store/remove some information.
-   * @param row
-   *          page row
    * @param initScore
    *          initial boost value for the Lucene document.
    * @return boost value for the Lucene document. This value is passed as an

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/storage/StorageUtils.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/storage/StorageUtils.java 
b/src/java/org/apache/nutch/storage/StorageUtils.java
index b68e8f8..e82a3c5 100644
--- a/src/java/org/apache/nutch/storage/StorageUtils.java
+++ b/src/java/org/apache/nutch/storage/StorageUtils.java
@@ -82,7 +82,7 @@ public class StorageUtils {
   /**
    * Return the Persistent Gora class used to persist Nutch Web data.
    * 
-   * @param the
+   * @param conf
    *          Nutch configuration
    * @return the Gora DataStore persistent class
    * @throws ClassNotFoundException

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java 
b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
index d3f9799..7f36b52 100644
--- a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
+++ b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
@@ -38,21 +38,21 @@ import org.apache.hadoop.util.StringUtils;
  * <p>
  * The <code>ArchRecordReader</code> class provides a record reader which reads
  * records from arc files.
- * </p>
+ *
  * 
  * <p>
  * Arc files are essentially tars of gzips. Each record in an arc file is a
  * compressed gzip. Multiple records are concatenated together to form a
- * complete arc. For more information on the arc file format see {@link http
- * ://www.archive.org/web/researcher/ArcFileFormat.php}.
- * </p>
- * 
+ * complete arc. For more information on the arc file format see
+ * <a href="http://www.archive.org/web/researcher/ArcFileFormat.php";>
+ *   http://www.archive.org/web/researcher/ArcFileFormat.php</a>.
+ *
  * <p>
  * Arc files are used by the internet archive and grub projects.
- * </p>
+ *
  * 
- * @see http://www.archive.org/
- * @see http://www.grub.org/
+ * @see <a href="http://www.archive.org/";>http://www.archive.org/</a>
+ * @see <a href="http://www.grub.org/";>http://www.grub.org/</a>
  */
 public class ArcRecordReader implements RecordReader<Text, BytesWritable> {
 
@@ -72,7 +72,7 @@ public class ArcRecordReader implements RecordReader<Text, 
BytesWritable> {
   /**
    * <p>
    * Returns true if the byte array passed matches the gzip header magic 
number.
-   * </p>
+   *
    * 
    * @param input
    *          The byte array to check.
@@ -174,7 +174,7 @@ public class ArcRecordReader implements RecordReader<Text, 
BytesWritable> {
    * Returns true if the next record in the split is read into the key and 
value
    * pair. The key will be the arc record header and the values will be the raw
    * content bytes of the arc record.
-   * </p>
+   *
    * 
    * @param key
    *          The record key

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/Bytes.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/Bytes.java 
b/src/java/org/apache/nutch/util/Bytes.java
index db9f468..043a897 100644
--- a/src/java/org/apache/nutch/util/Bytes.java
+++ b/src/java/org/apache/nutch/util/Bytes.java
@@ -980,7 +980,7 @@ public class Bytes {
    *          left operand
    * @param right
    *          right operand
-   * @return 0 if equal, < 0 if left is less than right, etc.
+   * @return 0 if equal, &lt; 0 if left is less than right, etc.
    */
   public static int compareTo(final byte[] left, final byte[] right) {
     return compareTo(left, 0, left.length, right, 0, right.length);
@@ -1001,7 +1001,7 @@ public class Bytes {
    *          How much to compare from the left buffer
    * @param length2
    *          How much to compare from the right buffer
-   * @return 0 if equal, < 0 if left is less than right, etc.
+   * @return 0 if equal, &lt; 0 if left is less than right, etc.
    */
   public static int compareTo(byte[] buffer1, int offset1, int length1,
       byte[] buffer2, int offset2, int length2) {
@@ -1050,7 +1050,7 @@ public class Bytes {
    * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
    *         passed in array. This method is what
    *         {@link org.apache.hadoop.io.Text} and
-   *         {@link ImmutableBytesWritable} use calculating hash code.
+   *         org.apache.hadoop.hbase.io.ImmutableBytesWritable use calculating 
hash code.
    */
   public static int hashCode(final byte[] b) {
     return hashCode(b, b.length);
@@ -1064,7 +1064,7 @@ public class Bytes {
    * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
    *         passed in array. This method is what
    *         {@link org.apache.hadoop.io.Text} and
-   *         {@link ImmutableBytesWritable} use calculating hash code.
+   *         org.apache.hadoop.hbase.io.ImmutableBytesWritable use calculating 
hash code.
    */
   public static int hashCode(final byte[] b, final int length) {
     return WritableComparator.hashBytes(b, length);
@@ -1366,12 +1366,12 @@ public class Bytes {
    * given amount.
    * 
    * @param value
-   *          - array of bytes containing long (length <= SIZEOF_LONG)
+   *          - array of bytes containing long (length &lt;= SIZEOF_LONG)
    * @param amount
    *          value will be incremented on (deincremented if negative)
    * @return array of bytes containing incremented long (length == SIZEOF_LONG)
    * @throws IOException
-   *           - if value.length > SIZEOF_LONG
+   *           - if value.length &gt; SIZEOF_LONG
    */
   public static byte[] incrementBytes(byte[] value, long amount)
       throws IOException {

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/EncodingDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/EncodingDetector.java 
b/src/java/org/apache/nutch/util/EncodingDetector.java
index 5b40e29..25f8eef 100644
--- a/src/java/org/apache/nutch/util/EncodingDetector.java
+++ b/src/java/org/apache/nutch/util/EncodingDetector.java
@@ -45,7 +45,7 @@ import java.util.Locale;
  * <li>Taking a set of clues and making a "best guess" as to the "real"
  * encoding.</li>
  * </ol>
- * </p>
+ *
  * 
  * <p>
  * A caller will often have some extra information about what the encoding 
might
@@ -56,7 +56,7 @@ import java.util.Locale;
  * <li>Run step (1) to generate a set of auto-detected clues;</li>
  * <li>Combine these clues with the caller-dependent "extra clues" 
available;</li>
  * <li>Run step (2) to guess what the most probable answer is.</li>
- * </p>
+ * </ul>
  */
 public class EncodingDetector {
 
@@ -211,9 +211,7 @@ public class EncodingDetector {
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   * 
-   * @param row
-   *          URL's row
+   *
    * @param defaultValue
    *          Default encoding to return if no encoding can be detected with
    *          enough confidence. Note that this will <b>not</b> be normalized
@@ -340,7 +338,7 @@ public class EncodingDetector {
   /**
    * Parse the character encoding from the specified content type header. If 
the
    * content type is null, or there is no explicit character encoding,
-   * <code>null</code> is returned. <br />
+   * <code>null</code> is returned. <p>
    * This method was copied from org.apache.catalina.util.RequestUtil, which is
    * licensed under the Apache License, Version 2.0 (the "License").
    * 

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/MimeUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/MimeUtil.java 
b/src/java/org/apache/nutch/util/MimeUtil.java
index 198fdee..241087c 100644
--- a/src/java/org/apache/nutch/util/MimeUtil.java
+++ b/src/java/org/apache/nutch/util/MimeUtil.java
@@ -50,7 +50,7 @@ import org.apache.nutch.protocol.ProtocolOutput;
  *        substrate library, <a href="http://incubator.apache.org/tika/";>Apache
  *        Tika</a>. Any mime handling code should be placed in this utility
  *        class, and hidden from the Nutch classes that rely on it.
- *        </p>
+ *
  */
 public final class MimeUtil {
 
@@ -229,7 +229,7 @@ public final class MimeUtil {
    * method.
    * 
    * @param url
-   *          A string representation of the document {@link URL} to sense the
+   *          A string representation of the document. URL to sense the
    *          {@link MimeType} for.
    * @return An appropriate {@link MimeType}, identified from the given 
Document
    *         url in string form.

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/NodeWalker.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/NodeWalker.java 
b/src/java/org/apache/nutch/util/NodeWalker.java
index 16e84c3..3e0b0e1 100644
--- a/src/java/org/apache/nutch/util/NodeWalker.java
+++ b/src/java/org/apache/nutch/util/NodeWalker.java
@@ -27,12 +27,12 @@ import org.w3c.dom.NodeList;
  * of recursion. As the node tree is walked the next node is popped off of the
  * stack and all of its children are automatically added to the stack to be
  * called in tree order.
- * </p>
+ *
  * 
  * <p>
  * Currently this class is not thread safe. It is assumed that only one thread
  * will be accessing the <code>NodeWalker</code> at any given time.
- * </p>
+ *
  */
 public class NodeWalker {
 
@@ -58,7 +58,7 @@ public class NodeWalker {
    * children onto the stack, allowing us to walk the node tree without the use
    * of recursion. If there are no more nodes on the stack then null is
    * returned.
-   * </p>
+   *
    * 
    * @return Node The next <code>Node</code> on the stack or null if there 
isn't
    *         a next node.
@@ -90,12 +90,12 @@ public class NodeWalker {
    * When getting a next node from the walker, that node's children are
    * automatically added to the stack. You can call this method to remove those
    * children from the stack.
-   * </p>
+   *
    * 
    * <p>
    * This is useful when you don't want to process deeper into the current path
    * of the node tree but you want to continue processing sibling nodes.
-   * </p>
+   *
    * 
    */
   public void skipChildren() {

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/NutchJob.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/NutchJob.java 
b/src/java/org/apache/nutch/util/NutchJob.java
index c0456c1..029e7ae 100644
--- a/src/java/org/apache/nutch/util/NutchJob.java
+++ b/src/java/org/apache/nutch/util/NutchJob.java
@@ -63,7 +63,7 @@ public class NutchJob extends Job {
    * Creates a new {@link NutchJob} with no particular {@link 
org.apache.hadoop.mapreduce.Cluster} and a 
    * given {@link org.apache.hadoop.conf.Configuration}.
    * 
-   * The <code>NutchJob</code> makes a copy of the <code>Configuration</code> 
so 
+   * The <code>NutchJob</code> makes a copy of the <code>Configuration</code> 
so
    * that any necessary internal modifications do not reflect on the incoming 
    * parameter.
    * 
@@ -87,7 +87,7 @@ public class NutchJob extends Job {
    * and a given jobName.
    * A Cluster will be created from the conf parameter only when it's needed.
    *
-   * The <code>NutchJob</code> makes a copy of the <code>Configuration</code> 
so 
+   * The <code>NutchJob</code> makes a copy of the <code>Configuration</code> 
so
    * that any necessary internal modifications do not reflect on the incoming 
    * parameter.
    * 

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/NutchTool.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/NutchTool.java 
b/src/java/org/apache/nutch/util/NutchTool.java
index 1f5789a..443d1da 100644
--- a/src/java/org/apache/nutch/util/NutchTool.java
+++ b/src/java/org/apache/nutch/util/NutchTool.java
@@ -36,11 +36,19 @@ public abstract class NutchTool extends Configured {
 
   /**
    * Runs the tool, using a map of arguments. May return results, or null.
+   *
+   * @param args map of arguments
+   * @return results or null
+   * @throws Exception
    */
   public abstract Map<String, Object> run(Map<String, Object> args)
       throws Exception;
 
-  /** Returns relative progress of the tool, a float in range [0,1]. */
+  /**
+   * Returns relative progress of the tool, a float in range [0,1]
+   *
+   * @return relative progress of the tool, a float in range [0,1]
+   */
   public float getProgress() {
     float res = 0;
     if (currentJob != null) {
@@ -62,7 +70,11 @@ public abstract class NutchTool extends Configured {
     return res;
   }
 
-  /** Returns current status of the running tool. */
+  /**
+   * Returns current status of the running tool
+   *
+   * @return current status of the running tool
+   */
   public Map<String, Object> getStatus() {
     return status;
   }
@@ -72,6 +84,7 @@ public abstract class NutchTool extends Configured {
    * this, since by default it calls {@link #killJob()}.
    * 
    * @return true if succeeded, false otherwise
+   * @throws Exception
    */
   public boolean stopJob() throws Exception {
     return killJob();

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/PrefixStringMatcher.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/PrefixStringMatcher.java 
b/src/java/org/apache/nutch/util/PrefixStringMatcher.java
index e323b67..6ca48c8 100644
--- a/src/java/org/apache/nutch/util/PrefixStringMatcher.java
+++ b/src/java/org/apache/nutch/util/PrefixStringMatcher.java
@@ -70,8 +70,8 @@ public class PrefixStringMatcher extends TrieStringMatcher {
   }
 
   /**
-   * Returns the shortest prefix of <code>input<code> that is matched,
-   * or <code>null<code> if no match exists.
+   * Returns the shortest prefix of <code>input</code> that is matched,
+   * or <code>null</code> if no match exists.
    */
   public String shortestMatch(String input) {
     TrieNode node = root;
@@ -86,8 +86,8 @@ public class PrefixStringMatcher extends TrieStringMatcher {
   }
 
   /**
-   * Returns the longest prefix of <code>input<code> that is matched,
-   * or <code>null<code> if no match exists.
+   * Returns the longest prefix of <code>input</code> that is matched,
+   * or <code>null</code> if no match exists.
    */
   public String longestMatch(String input) {
     TrieNode node = root;

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/SuffixStringMatcher.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/SuffixStringMatcher.java 
b/src/java/org/apache/nutch/util/SuffixStringMatcher.java
index a967c01..6e070b9 100644
--- a/src/java/org/apache/nutch/util/SuffixStringMatcher.java
+++ b/src/java/org/apache/nutch/util/SuffixStringMatcher.java
@@ -65,8 +65,8 @@ public class SuffixStringMatcher extends TrieStringMatcher {
   }
 
   /**
-   * Returns the shortest suffix of <code>input<code> that is matched,
-   * or <code>null<code> if no match exists.
+   * Returns the shortest suffix of <code>input</code> that is matched,
+   * or <code>null</code> if no match exists.
    */
   public String shortestMatch(String input) {
     TrieNode node = root;
@@ -81,8 +81,8 @@ public class SuffixStringMatcher extends TrieStringMatcher {
   }
 
   /**
-   * Returns the longest suffix of <code>input<code> that is matched,
-   * or <code>null<code> if no match exists.
+   * Returns the longest suffix of <code>input</code> that is matched,
+   * or <code>null</code> if no match exists.
    */
   public String longestMatch(String input) {
     TrieNode node = root;

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/TableUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/TableUtil.java 
b/src/java/org/apache/nutch/util/TableUtil.java
index 68ded69..e6ccbbc 100644
--- a/src/java/org/apache/nutch/util/TableUtil.java
+++ b/src/java/org/apache/nutch/util/TableUtil.java
@@ -33,7 +33,7 @@ public class TableUtil {
    * E.g. "http://bar.foo.com:8983/to/index.html?a=b"; becomes
    * "com.foo.bar:8983:http/to/index.html?a=b".
    * 
-   * @param url
+   * @param urlString
    *          url to be reversed
    * @return Reversed url
    * @throws MalformedURLException
@@ -111,7 +111,7 @@ public class TableUtil {
 
   /**
    * Given a reversed url, returns the reversed host E.g
-   * "com.foo.bar:http:8983/to/index.html?a=b" -> "com.foo.bar"
+   * "com.foo.bar:http:8983/to/index.html?a=b" -&gt; "com.foo.bar"
    * 
    * @param reversedUrl
    *          Reversed url

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/TimingUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/TimingUtil.java 
b/src/java/org/apache/nutch/util/TimingUtil.java
index 524bee6..497716c 100644
--- a/src/java/org/apache/nutch/util/TimingUtil.java
+++ b/src/java/org/apache/nutch/util/TimingUtil.java
@@ -32,7 +32,7 @@ public class TimingUtil {
    * @param end
    *          The end of the time period
    * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y
-   *         minutes and Z seconds or null if start > end.
+   *         minutes and Z seconds or null if start &gt; end.
    */
   public static String elapsedTime(long start, long end) {
     if (start > end) {

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/TrieStringMatcher.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/TrieStringMatcher.java 
b/src/java/org/apache/nutch/util/TrieStringMatcher.java
index 95f06ad..e7773cb 100644
--- a/src/java/org/apache/nutch/util/TrieStringMatcher.java
+++ b/src/java/org/apache/nutch/util/TrieStringMatcher.java
@@ -186,15 +186,15 @@ public abstract class TrieStringMatcher {
   public abstract boolean matches(String input);
 
   /**
-   * Returns the shortest substring of <code>input<code> that is
-   * matched by a pattern in the trie, or <code>null<code> if no match
+   * Returns the shortest substring of <code>input</code> that is
+   * matched by a pattern in the trie, or <code>null</code> if no match
    * exists.
    */
   public abstract String shortestMatch(String input);
 
   /**
-   * Returns the longest substring of <code>input<code> that is
-   * matched by a pattern in the trie, or <code>null<code> if no match
+   * Returns the longest substring of <code>input</code> that is
+   * matched by a pattern in the trie, or <code>null</code> if no match
    * exists.
    */
   public abstract String longestMatch(String input);

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/URLUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/URLUtil.java 
b/src/java/org/apache/nutch/util/URLUtil.java
index 5183ba1..e1df9e3 100644
--- a/src/java/org/apache/nutch/util/URLUtil.java
+++ b/src/java/org/apache/nutch/util/URLUtil.java
@@ -219,52 +219,49 @@ public class URLUtil {
    * Yahoo! Slurp crawler described here:<br>
    * <a href=
    * "http://help.yahoo.com/l/nz/yahooxtra/search/webcrawler/slurp-11.html";> 
How
-   * does the Yahoo! webcrawler handle redirects?</a> <br>
+   * does the Yahoo! webcrawler handle redirects?</a>
    * <br>
-   * <ol>
-   * <li>Choose target url if either url is malformed.</li>
-   * <li>If different domains the keep the destination whether or not the
-   * redirect is temp or perm</li>
-   * <ul>
-   * <li>a.com -> b.com*</li>
-   * </ul>
-   * <li>If the redirect is permanent and the source is root, keep the 
source.</li>
-   * <ul>
-   * <li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li>
-   * </ul>
-   * <li>If the redirect is permanent and the source is not root and the
-   * destination is root, keep the destination</li>
-   * <ul>
-   * <li>a.com/xyz/index.html -> a.com*</li>
-   * </ul>
-   * <li>If the redirect is permanent and neither the source nor the 
destination
-   * is root, then keep the destination</li>
-   * <ul>
-   * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
-   * </ul>
-   * <li>If the redirect is temporary and source is root and destination is not
-   * root, then keep the source</li>
-   * <ul>
-   * <li>*a.com -> a.com/xyz/index.html</li>
-   * </ul>
-   * <li>If the redirect is temporary and source is not root and destination is
-   * root, then keep the destination</li>
-   * <ul>
-   * <li>a.com/xyz/index.html -> a.com*</li>
-   * </ul>
-   * <li>If the redirect is temporary and neither the source or the destination
+   *
+   * <dl>
+   *
+   * <dt>Choose target url if either url is malformed.</dt>
+   *
+   * <dt>If different domains the keep the destination whether or not the
+   * redirect is temp or perm</dt>
+   * <dd>a.com -&gt; b.com*</dd>
+   *
+   * <dt>If the redirect is permanent and the source is root, keep the 
source.</dt>
+   * <dd>*a.com -&gt; a.com?y=1 || *a.com -&gt; a.com/xyz/index.html</dd>
+   *
+   * <dt>If the redirect is permanent and the source is not root and the
+   * destination is root, keep the destination</dt>
+   * <dd>a.com/xyz/index.html -&gt; a.com*</dd>
+   *
+   * <dt>If the redirect is permanent and neither the source nor the 
destination
+   * is root, then keep the destination</dt>
+   * <dd>a.com/xyz/index.html -&gt; a.com/abc/page.html*</dd>
+   *
+   * <dt>If the redirect is temporary and source is root and destination is not
+   * root, then keep the source</dt>
+   * <dd>*a.com -&gt; a.com/xyz/index.html</dd>
+   *
+   * <dt>If the redirect is temporary and source is not root and destination is
+   * root, then keep the destination</dt>
+   * <dd>a.com/xyz/index.html -&gt; a.com*</dd>
+   *
+   * <dt>If the redirect is temporary and neither the source or the destination
    * is root, then keep the shortest url. First check for the shortest host, 
and
    * if both are equal then check by path. Path is first by length then by the
-   * number of / path separators.</li>
-   * <ul>
-   * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
-   * <li>*www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html</li>
-   * </ul>
-   * <li>If the redirect is temporary and both the source and the destination
-   * are root, then keep the shortest sub-domain</li>
-   * <ul>
-   * <li>*www.a.com -> www.news.a.com</li>
-   * </ul>
+   * number of / path separators.</dt>
+   * <dd>a.com/xyz/index.html -&gt; a.com/abc/page.html*</dd>
+   * <dd>*www.a.com/xyz/index.html -&gt; www.news.a.com/xyz/index.html</dd>
+   *
+   * <dt>If the redirect is temporary and both the source and the destination
+   * are root, then keep the shortest sub-domain</dt>
+   * <dd>*www.a.com -&gt; www.news.a.com</dd>
+   *
+   * </dl>
+   *
    * <br>
    * While not in this logic there is a further piece of representative url
    * logic that occurs during indexing and after scoring. During creation of 
the

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/domain/DomainSuffix.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/domain/DomainSuffix.java 
b/src/java/org/apache/nutch/util/domain/DomainSuffix.java
index ae03ec4..0e0b7b0 100644
--- a/src/java/org/apache/nutch/util/domain/DomainSuffix.java
+++ b/src/java/org/apache/nutch/util/domain/DomainSuffix.java
@@ -23,17 +23,16 @@ package org.apache.nutch.util.domain;
  * name of a host. The domain name of a host is defined to be the last part
  * before the domain suffix, w/o subdomain names. As an example the domain name
  * of <br>
- * <code> http://lucene.apache.org/ 
+ * <code> http://lucene.apache.org/
  * </code><br>
  * is <code> apache.org</code> <br>
  * This class holds three fields, <strong>domain</strong> field represents the
  * suffix (such as "co.uk") <strong>boost</strong> is a float for boosting 
score
  * of url's with this suffix <strong>status</strong> field represents domain's
- * status
+ * status. Check also domain-suffixes.xml
  * 
  * @author Enis Soztutar &lt;[email protected]&gt;
  * @see TopLevelDomain
- * @see domain-suffixes.xml
  */
 public class DomainSuffix {
 

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/util/domain/TopLevelDomain.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/util/domain/TopLevelDomain.java 
b/src/java/org/apache/nutch/util/domain/TopLevelDomain.java
index 6386335..87e370e 100644
--- a/src/java/org/apache/nutch/util/domain/TopLevelDomain.java
+++ b/src/java/org/apache/nutch/util/domain/TopLevelDomain.java
@@ -24,8 +24,8 @@ package org.apache.nutch.util.domain;
  * top-level domain is <code>com</code>.
  * 
  * @author Enis Soztutar &lt;[email protected]&gt;
- * @see http://www.iana.org/
- * @see http://en.wikipedia.org/wiki/Top-level_domain
+ * @see <a href="http://www.iana.org/";>http://www.iana.org/</a>
+ * @see <a 
href="http://en.wikipedia.org/wiki/Top-level_domain";>http://en.wikipedia.org/wiki/Top-level_domain</a>
  */
 public class TopLevelDomain extends DomainSuffix {
 

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java 
b/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
index 9df4e27..0751ddc 100644
--- a/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
+++ b/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
@@ -64,7 +64,7 @@ import com.sun.syndication.io.SyndFeedInput;
  * <p>
  * A new RSS/ATOM Feed{@link Parser} that rapidly parses all referenced links
  * and content present in the feed.
- * </p>
+ *
  * 
  */
 public class FeedParser implements Parser {

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
 
b/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
index 9e2e75b..7e0e246 100644
--- 
a/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
+++ 
b/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
@@ -36,7 +36,7 @@ import java.util.Locale;
  * Indexing filter that offers an option to either index all inbound anchor 
text
  * for a document or deduplicate anchors. Deduplication does have it's con's,
  * 
- * @see {@code anchorIndexingFilter.deduplicate} in nutch-default.xml.
+ * Check {@code anchorIndexingFilter.deduplicate} in nutch-default.xml.
  */
 public class AnchorIndexingFilter implements IndexingFilter {
 

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
 
b/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
index fdd3b81..a97e9ed 100644
--- 
a/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
+++ 
b/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
@@ -37,7 +37,7 @@ import org.apache.nutch.util.Bytes;
  * Indexer which can be configured to extract metadata from the crawldb, parse
  * metadata or content metadata. You can specify the properties "index.db",
  * "index.parse" or "index.content" who's values are comma-delimited
- * <value>key1,key2,key3</value>.
+ * &lt;value&gt;key1,key2,key3&lt;/value&gt;.
  */
 
 public class MetadataIndexer implements IndexingFilter {

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
 
b/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
index b1d99e5..9171b1c 100644
--- 
a/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
+++ 
b/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
@@ -148,14 +148,14 @@ public class MoreIndexingFilter implements IndexingFilter 
{
    * primaryType and subType to field "type" as un-stored, indexed and
    * un-tokenized, so that search results can be confined by contentType or its
    * primaryType or its subType.
-   * </p>
+   *
    * <p>
    * For example, if contentType is application/vnd.ms-powerpoint, search can 
be
    * done with one of the following qualifiers
    * type:application/vnd.ms-powerpoint type:application type:vnd.ms-powerpoint
    * all case insensitive. The query filter is implemented in
    * {@link TypeQueryFilter}.
-   * </p>
+   *
    * 
    * @param doc
    * @param data

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
 
b/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
index f3af6a9..064cd8d 100644
--- 
a/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
+++ 
b/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
@@ -84,12 +84,19 @@ public class HTMLLanguageParser implements ParseFilter {
 
   /**
    * Scan the HTML document looking at possible indications of content 
language<br>
-   * <li>1. html lang attribute
-   * (http://www.w3.org/TR/REC-html40/struct/dirlang.html#h-8.1) <li>2. meta
+   * <ol>
+   * <li>html lang attribute
+   * (http://www.w3.org/TR/REC-html40/struct/dirlang.html#h-8.1)
+   * </li>
+   * <li>meta
    * dc.language
    * (http://dublincore.org/documents/2000/07/16/usageguide/qualified
-   * -html.shtml#language) <li>3. meta http-equiv (content-language)
-   * (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2) <br>
+   * -html.shtml#language)
+   * </li>
+   * <li>meta http-equiv (content-language)
+   * (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2)
+   * </li>
+   * </ol>
    */
   public Parse filter(String url, WebPage page, Parse parse,
       HTMLMetaTags metaTags, DocumentFragment doc) {

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
 
b/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
index d374e95..a1475a7 100644
--- 
a/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
+++ 
b/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
@@ -44,19 +44,13 @@ import org.apache.nutch.net.*;
  * expressions.
  * 
  * <p>
- * The regular expressions rules are expressed in a file. The file of rules is
- * provided by each implementation using the
- * {@link #getRulesFile(Configuration)} method.
- * </p>
- * 
- * <p>
- * The format of this file is made of many rules (one per line):<br/>
+ * The format of this file is made of many rules (one per line):<br>
  * <code>
  * [+-]&lt;regex&gt;
- * </code><br/>
+ * </code><br>
  * where plus (<code>+</code>)means go ahead and index it and minus (
  * <code>-</code>)means no.
- * </p>
+ *
  */
 public abstract class RegexURLFilterBase implements URLFilter {
 

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java 
b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
index 31b54da..6bd4305 100644
--- a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
+++ b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
@@ -174,7 +174,7 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * supply a locator: if it does so, it must supply the locator to the
    * application by invoking this method before invoking any of the other
    * methods in the ContentHandler interface.
-   * </p>
+   *
    * 
    * <p>
    * The locator allows the application to determine the end position of any
@@ -183,13 +183,13 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * errors (such as character content that does not match an application's
    * business rules). The information returned by the locator is probably not
    * sufficient for use with a search engine.
-   * </p>
+   *
    * 
    * <p>
    * Note that the locator will return correct information only during the
    * invocation of the events in this interface. The application should not
    * attempt to use it at any other time.
-   * </p>
+   *
    * 
    * @param locator
    *          An object that can return the location of any SAX document event.
@@ -206,7 +206,7 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * <p>
    * The SAX parser will invoke this method only once, before any other methods
    * in this interface or in DTDHandler (except for setDocumentLocator).
-   * </p>
+   *
    */
   public void startDocument() throws org.xml.sax.SAXException {
 
@@ -221,7 +221,7 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * method invoked during the parse. The parser shall not invoke this method
    * until it has either abandoned parsing (because of an unrecoverable error)
    * or reached the end of input.
-   * </p>
+   *
    */
   public void endDocument() throws org.xml.sax.SAXException {
 
@@ -237,14 +237,14 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * startElement() event (even when the element is empty). All of the 
element's
    * content will be reported, in order, before the corresponding endElement()
    * event.
-   * </p>
+   *
    * 
    * <p>
    * If the element name has a namespace prefix, the prefix will still be
    * attached. Note that the attribute list provided will contain only
    * attributes with explicit values (specified or defaulted): #IMPLIED
    * attributes will be omitted.
-   * </p>
+   *
    * 
    * 
    * @param ns
@@ -328,12 +328,12 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * The SAX parser will invoke this method at the end of every element in the
    * XML document; there will be a corresponding startElement() event for every
    * endElement() event (even when the element is empty).
-   * </p>
+   *
    * 
    * <p>
    * If the element name has a namespace prefix, the prefix will still be
    * attached to the name.
-   * </p>
+   *
    * 
    * 
    * @param ns
@@ -371,18 +371,18 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * they may split it into several chunks; however, all of the characters in
    * any single event must come from the same external entity, so that the
    * Locator provides useful information.
-   * </p>
+   *
    * 
    * <p>
    * The application must not attempt to read from the array outside of the
    * specified range.
-   * </p>
+   *
    * 
    * <p>
    * Note that some parsers will report whitespace using the
    * ignorableWhitespace() method rather than this one (validating parsers must
    * do so).
-   * </p>
+   *
    * 
    * @param ch
    *          The characters from the XML document.
@@ -489,19 +489,19 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * whitespace (see the W3C XML 1.0 recommendation, section 2.10):
    * non-validating parsers may also use this method if they are capable of
    * parsing and using content models.
-   * </p>
+   *
    * 
    * <p>
    * SAX parsers may return all contiguous whitespace in a single chunk, or 
they
    * may split it into several chunks; however, all of the characters in any
    * single event must come from the same external entity, so that the Locator
    * provides useful information.
-   * </p>
+   *
    * 
    * <p>
    * The application must not attempt to read from the array outside of the
    * specified range.
-   * </p>
+   *
    * 
    * @param ch
    *          The characters from the XML document.
@@ -539,12 +539,12 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * The Parser will invoke this method once for each processing instruction
    * found: note that processing instructions may occur before or after the 
main
    * document element.
-   * </p>
+   *
    * 
    * <p>
    * A SAX parser should never report an XML declaration (XML 1.0, section 2.8)
    * or a text declaration (XML 1.0, section 4.3.1) using this method.
-   * </p>
+   *
    * 
    * @param target
    *          The processing instruction target.
@@ -608,18 +608,18 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * they may split it into several chunks; however, all of the characters in
    * any single event must come from the same external entity, so that the
    * Locator provides useful information.
-   * </p>
+   *
    * 
    * <p>
    * The application must not attempt to read from the array outside of the
    * specified range.
-   * </p>
+   *
    * 
    * <p>
    * Note that some parsers will report whitespace using the
    * ignorableWhitespace() method rather than this one (validating parsers must
    * do so).
-   * </p>
+   *
    * 
    * @param ch
    *          The characters from the XML document.
@@ -687,14 +687,14 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * processing: the SAX XML reader will automatically replace prefixes for
    * element and attribute names when the 
http://xml.org/sax/features/namespaces
    * feature is true (the default).
-   * </p>
+   *
    * 
    * <p>
    * There are cases, however, when applications need to use prefixes in
    * character data or in attribute values, where they cannot safely be 
expanded
    * automatically; the start/endPrefixMapping event supplies the information 
to
    * the application to expand prefixes in those contexts itself, if necessary.
-   * </p>
+   *
    * 
    * <p>
    * Note that start/endPrefixMapping events are not guaranteed to be properly
@@ -702,7 +702,7 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * before the corresponding startElement event, and all endPrefixMapping
    * events will occur after the corresponding endElement event, but their 
order
    * is not guaranteed.
-   * </p>
+   *
    * 
    * @param prefix
    *          The Namespace prefix being declared.
@@ -735,7 +735,7 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * See startPrefixMapping for details. This event will always occur after the
    * corresponding endElement event, but the order of endPrefixMapping events 
is
    * not otherwise guaranteed.
-   * </p>
+   *
    * 
    * @param prefix
    *          The prefix that was being mapping.
@@ -755,7 +755,7 @@ public class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * DTD subset). All processors may skip external entities, depending on the
    * values of the http://xml.org/sax/features/external-general-entities and 
the
    * http://xml.org/sax/features/external-parameter-entities properties.
-   * </p>
+   *
    * 
    * @param name
    *          The name of the skipped entity. If it is a parameter entity, the

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
 
b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
index 8e079fb..488cacd 100644
--- 
a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
+++ 
b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
@@ -113,7 +113,7 @@ public class DOMContentUtils {
 
   /**
    * This is a convinience method, equivalent to
-   * {@link #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
+   * {@link #getText(StringBuilder, Node, boolean)} which passes false as 
third argument
    * 
    */
   public void getText(StringBuilder sb, Node node) {

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
 
b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
index cfef10c..0143f06 100644
--- 
a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
+++ 
b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
@@ -35,7 +35,7 @@ public class XMLCharacterRecognizer {
    * Returns whether the specified <var>ch</var> conforms to the XML 1.0
    * definition of whitespace. Refer to <A
    * href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S";> the definition of
-   * <CODE>S</CODE></A> for details.
+   * <code>S</code></A> for details.
    * 
    * @param ch
    *          Character to check as XML whitespace.

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java 
b/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
index a481755..a685844 100644
--- a/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
+++ b/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
@@ -79,10 +79,10 @@ public class JSParseFilter implements ParseFilter, Parser {
    *          {@link WebPage} object relative to the URL
    * @param parse
    *          {@link Parse} object holding parse status
-   * @param metatags
-   *          within the {@link NutchDocument}
+   * @param metaTags
+   *          within the {@link HTMLMetaTags}
    * @param doc
-   *          The {@link NutchDocument} object
+   *          The {@link DocumentFragment} object
    * @return parse the actual {@link Parse} object
    */
   @Override

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java 
b/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
index a3f779a..4fbcad3 100644
--- a/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
+++ b/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
@@ -280,7 +280,7 @@ class ExtractText extends SWFTagTypesImpl {
 
     /*
      * There are some issues with this method: sometimes SWF files define their
-     * own font, so short of OCR we cannot guess what is the glyph code -> 
character
+     * own font, so short of OCR we cannot guess what is the glyph code -&gt; 
character
      * mapping. Additionally, some files don't use literal space character, 
instead
      * they adjust glyphAdvances. We don't handle it at all - in such cases 
the text
      * will be all glued together.

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java 
b/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
index 4f4c8a7..db59d13 100644
--- a/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
+++ b/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
@@ -174,7 +174,7 @@ class DOMBuilder implements ContentHandler, LexicalHandler {
    * supply a locator: if it does so, it must supply the locator to the
    * application by invoking this method before invoking any of the other
    * methods in the ContentHandler interface.
-   * </p>
+   *
    * 
    * <p>
    * The locator allows the application to determine the end position of any
@@ -183,13 +183,13 @@ class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * errors (such as character content that does not match an application's
    * business rules). The information returned by the locator is probably not
    * sufficient for use with a search engine.
-   * </p>
+   *
    * 
    * <p>
    * Note that the locator will return correct information only during the
    * invocation of the events in this interface. The application should not
    * attempt to use it at any other time.
-   * </p>
+   *
    * 
    * @param locator
    *          An object that can return the location of any SAX document event.
@@ -206,7 +206,7 @@ class DOMBuilder implements ContentHandler, LexicalHandler {
    * <p>
    * The SAX parser will invoke this method only once, before any other methods
    * in this interface or in DTDHandler (except for setDocumentLocator).
-   * </p>
+   *
    */
   public void startDocument() throws org.xml.sax.SAXException {
 
@@ -221,7 +221,7 @@ class DOMBuilder implements ContentHandler, LexicalHandler {
    * method invoked during the parse. The parser shall not invoke this method
    * until it has either abandoned parsing (because of an unrecoverable error)
    * or reached the end of input.
-   * </p>
+   *
    */
   public void endDocument() throws org.xml.sax.SAXException {
 
@@ -237,14 +237,14 @@ class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * startElement() event (even when the element is empty). All of the 
element's
    * content will be reported, in order, before the corresponding endElement()
    * event.
-   * </p>
+   *
    * 
    * <p>
    * If the element name has a namespace prefix, the prefix will still be
    * attached. Note that the attribute list provided will contain only
    * attributes with explicit values (specified or defaulted): #IMPLIED
    * attributes will be omitted.
-   * </p>
+   *
    * 
    * 
    * @param ns
@@ -328,12 +328,12 @@ class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * The SAX parser will invoke this method at the end of every element in the
    * XML document; there will be a corresponding startElement() event for every
    * endElement() event (even when the element is empty).
-   * </p>
+   *
    * 
    * <p>
    * If the element name has a namespace prefix, the prefix will still be
    * attached to the name.
-   * </p>
+   *
    * 
    * 
    * @param ns
@@ -373,18 +373,18 @@ class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * they may split it into several chunks; however, all of the characters in
    * any single event must come from the same external entity, so that the
    * Locator provides useful information.
-   * </p>
+   *
    * 
    * <p>
    * The application must not attempt to read from the array outside of the
    * specified range.
-   * </p>
+   *
    * 
    * <p>
    * Note that some parsers will report whitespace using the
    * ignorableWhitespace() method rather than this one (validating parsers must
    * do so).
-   * </p>
+   *
    * 
    * @param ch
    *          The characters from the XML document.
@@ -491,19 +491,19 @@ class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * whitespace (see the W3C XML 1.0 recommendation, section 2.10):
    * non-validating parsers may also use this method if they are capable of
    * parsing and using content models.
-   * </p>
+   *
    * 
    * <p>
    * SAX parsers may return all contiguous whitespace in a single chunk, or 
they
    * may split it into several chunks; however, all of the characters in any
    * single event must come from the same external entity, so that the Locator
    * provides useful information.
-   * </p>
+   *
    * 
    * <p>
    * The application must not attempt to read from the array outside of the
    * specified range.
-   * </p>
+   *
    * 
    * @param ch
    *          The characters from the XML document.
@@ -541,12 +541,12 @@ class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * The Parser will invoke this method once for each processing instruction
    * found: note that processing instructions may occur before or after the 
main
    * document element.
-   * </p>
+   *
    * 
    * <p>
    * A SAX parser should never report an XML declaration (XML 1.0, section 2.8)
    * or a text declaration (XML 1.0, section 4.3.1) using this method.
-   * </p>
+   *
    * 
    * @param target
    *          The processing instruction target.
@@ -610,18 +610,18 @@ class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * they may split it into several chunks; however, all of the characters in
    * any single event must come from the same external entity, so that the
    * Locator provides useful information.
-   * </p>
+   *
    * 
    * <p>
    * The application must not attempt to read from the array outside of the
    * specified range.
-   * </p>
+   *
    * 
    * <p>
    * Note that some parsers will report whitespace using the
    * ignorableWhitespace() method rather than this one (validating parsers must
    * do so).
-   * </p>
+   *
    * 
    * @param ch
    *          The characters from the XML document.
@@ -689,14 +689,14 @@ class DOMBuilder implements ContentHandler, 
LexicalHandler {
    * processing: the SAX XML reader will automatically replace prefixes for
    * element and attribute names when the 
http://xml.org/sax/features/namespaces
    * feature is true (the default).
-   * </p>
+   *
    * 
    * <p>
    * There are cases, however, when applications need to use prefixes in
    * character data or in attribute values, where they cannot safely be 
expanded
    * automatically; the start/endPrefixMapping event supplies the information 
to
    * the application to expand prefixes in those contexts itself, if necessary.
-   * </p>
+   *
    * 
    * <p>
    * Note that start/endPrefixMapping events are not guaranteed to be properly
@@ -704,7 +704,7 @@ class DOMBuilder implements ContentHandler, LexicalHandler {
    * before the corresponding startElement event, and all endPrefixMapping
    * events will occur after the corresponding endElement event, but their 
order
    * is not guaranteed.
-   * </p>
+   *
    * 
    * @param prefix
    *          The Namespace prefix being declared.
@@ -737,7 +737,7 @@ class DOMBuilder implements ContentHandler, LexicalHandler {
    * See startPrefixMapping for details. This event will always occur after the
    * corresponding endElement event, but the order of endPrefixMapping events 
is
    * not otherwise guaranteed.
-   * </p>
+   *
    * 
    * @param prefix
    *          The prefix that was being mapping.
@@ -757,7 +757,7 @@ class DOMBuilder implements ContentHandler, LexicalHandler {
    * DTD subset). All processors may skip external entities, depending on the
    * values of the http://xml.org/sax/features/external-general-entities and 
the
    * http://xml.org/sax/features/external-parameter-entities properties.
-   * </p>
+   *
    * 
    * @param name
    *          The name of the skipped entity. If it is a parameter entity, the

http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java
 
b/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java
index d625c33..b5c95ce 100644
--- 
a/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java
+++ 
b/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java
@@ -35,7 +35,7 @@ class XMLCharacterRecognizer {
    * Returns whether the specified <var>ch</var> conforms to the XML 1.0
    * definition of whitespace. Refer to <A
    * href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S";> the definition of
-   * <CODE>S</CODE></A> for details.
+   * <code>S</code></A> for details.
    * 
    * @param ch
    *          Character to check as XML whitespace.

Reply via email to