This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new a92878df1 NUTCH-2972 Javadoc build fails using JDK 17 - fix Javadoc 
issues when building with JDK 17
a92878df1 is described below

commit a92878df1ea586057dc8bc7e9ade376a9b8edc20
Author: Sebastian Nagel <sna...@apache.org>
AuthorDate: Fri Feb 24 17:16:27 2023 +0100

    NUTCH-2972 Javadoc build fails using JDK 17
    - fix Javadoc issues when building with JDK 17
---
 src/java/org/apache/nutch/segment/SegmentMerger.java     | 14 ++++++++------
 src/java/org/apache/nutch/tools/arc/ArcRecordReader.java | 16 +++++++---------
 .../apache/nutch/urlfilter/suffix/SuffixURLFilter.java   |  8 +++++---
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/java/org/apache/nutch/segment/SegmentMerger.java 
b/src/java/org/apache/nutch/segment/SegmentMerger.java
index 056df3c88..6bb90e472 100644
--- a/src/java/org/apache/nutch/segment/SegmentMerger.java
+++ b/src/java/org/apache/nutch/segment/SegmentMerger.java
@@ -76,7 +76,9 @@ import org.apache.nutch.util.NutchJob;
  * <p>
  * Also, it's possible to slice the resulting segment into chunks of fixed 
size.
  * </p>
- * <h3>Important Notes</h3> <h4>Which parts are merged?</h4>
+ * <section>
+ * <h2>Important Notes</h2>
+ * <h3>Which parts are merged?</h3>
  * <p>
  * It doesn't make sense to merge data from segments, which are at different
  * stages of processing (e.g. one unfetched segment, one fetched but not 
parsed,
@@ -87,14 +89,14 @@ import org.apache.nutch.util.NutchJob;
  * fall back to just merging fetchlists, and it will skip all other data from
  * all segments.
  * </p>
- * <h4>Merging fetchlists</h4>
+ * <h3>Merging fetchlists</h3>
  * <p>
  * Merging segments, which contain just fetchlists (i.e. prior to fetching) is
  * not recommended, because this tool (unlike the
  * {@link org.apache.nutch.crawl.Generator} doesn't ensure that fetchlist parts
  * for each map task are disjoint.
  * </p>
- * <h4>Duplicate content</h4>
+ * <h3>Duplicate content</h3>
  * Merging segments removes older content whenever possible (see below).
  * However, this is NOT the same as de-duplication, which in addition removes
  * identical content found at different URL-s. In other words, running
@@ -108,15 +110,15 @@ import org.apache.nutch.util.NutchJob;
  * segments be named in an increasing lexicographic order as their creation 
time
  * increases.
  * </p>
- * <h4>Merging and indexes</h4>
+ * <h3>Merging and indexes</h3>
  * <p>
  * Merged segment gets a different name. Since Indexer embeds segment names in
  * indexes, any indexes originally created for the input segments will NOT work
  * with the merged segment. Newly created merged segment(s) need to be indexed
  * afresh. This tool doesn't use existing indexes in any way, so if you plan to
  * merge segments you don't have to index them prior to merging.
- * 
- * @author Andrzej Bialecki
+ * </p>
+ * </section>
  */
 public class SegmentMerger extends Configured implements Tool{
   private static final Logger LOG = LoggerFactory
diff --git a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java 
b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
index 0a93947e4..b514a63fc 100644
--- a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
+++ b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
@@ -38,19 +38,17 @@ import org.apache.hadoop.util.ReflectionUtils;
 /**
  * The <code>ArchRecordReader</code> class provides a record reader which reads
  * records from arc files.
- * <p>
+ * 
  * Arc files are essentially tars of gzips. Each record in an arc file is a
  * compressed gzip. Multiple records are concatenated together to form a
- * complete arc.</p> 
- * <p>For more information on the arc file format 
- * @see <a 
href='http://www.archive.org/web/researcher/ArcFileFormat.php'>ArcFileFormat</a>.
- * </p>
+ * complete arc.
  * 
- * <p>
- * Arc files are used by the internet archive and grub projects.
- * </p>
+ * For more information on the arc file format 
+ * @see <a 
href='http://www.archive.org/web/researcher/ArcFileFormat.php'>ArcFileFormat</a>.
+
+ * Arc files are used by the Internet Archive and grub projects.
  * 
- * @see <a href='http://www.archive.org/'>archive.org</a> 
+ * @see <a href='https://www.archive.org/'>archive.org</a>
  * @see <a href='http://www.grub.org/'>grub.org</a>
  */
 public class ArcRecordReader extends RecordReader<Text, BytesWritable> {
diff --git 
a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
 
b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
index dd8605f79..5edf5fc38 100644
--- 
a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
+++ 
b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
@@ -78,6 +78,9 @@ import java.net.MalformedURLException;
  * expressions, it only accepts literal suffixes. I.e. a suffix "+*.jpg" is 
most
  * probably wrong, you should use "+.jpg" instead.
  * </p>
+ * 
+ * <section>
+ * <h2>Examples</h2>
  * <h3>Example 1</h3>
  * <p>
  * The configuration shown below will accept all URLs with '.html' or '.htm'
@@ -96,7 +99,7 @@ import java.net.MalformedURLException;
  *  .htm
  * </pre>
  * 
- * <h4>Example 2</h4>
+ * <h3>Example 2</h3>
  * <p>
  * The configuration shown below will accept all URLs except common graphical
  * formats.
@@ -115,8 +118,7 @@ import java.net.MalformedURLException;
  *  .jpeg
  *  .bmp
  * </pre>
- *  
- * @author Andrzej Bialecki
+ * </section>
  */
 public class SuffixURLFilter implements URLFilter {
 

Reply via email to