This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new a92878df1 NUTCH-2972 Javadoc build fails using JDK 17 - fix Javadoc issues when building with JDK 17 a92878df1 is described below commit a92878df1ea586057dc8bc7e9ade376a9b8edc20 Author: Sebastian Nagel <sna...@apache.org> AuthorDate: Fri Feb 24 17:16:27 2023 +0100 NUTCH-2972 Javadoc build fails using JDK 17 - fix Javadoc issues when building with JDK 17 --- src/java/org/apache/nutch/segment/SegmentMerger.java | 14 ++++++++------ src/java/org/apache/nutch/tools/arc/ArcRecordReader.java | 16 +++++++--------- .../apache/nutch/urlfilter/suffix/SuffixURLFilter.java | 8 +++++--- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/java/org/apache/nutch/segment/SegmentMerger.java b/src/java/org/apache/nutch/segment/SegmentMerger.java index 056df3c88..6bb90e472 100644 --- a/src/java/org/apache/nutch/segment/SegmentMerger.java +++ b/src/java/org/apache/nutch/segment/SegmentMerger.java @@ -76,7 +76,9 @@ import org.apache.nutch.util.NutchJob; * <p> * Also, it's possible to slice the resulting segment into chunks of fixed size. * </p> - * <h3>Important Notes</h3> <h4>Which parts are merged?</h4> + * <section> + * <h2>Important Notes</h2> + * <h3>Which parts are merged?</h3> * <p> * It doesn't make sense to merge data from segments, which are at different * stages of processing (e.g. one unfetched segment, one fetched but not parsed, @@ -87,14 +89,14 @@ import org.apache.nutch.util.NutchJob; * fall back to just merging fetchlists, and it will skip all other data from * all segments. * </p> - * <h4>Merging fetchlists</h4> + * <h3>Merging fetchlists</h3> * <p> * Merging segments, which contain just fetchlists (i.e. prior to fetching) is * not recommended, because this tool (unlike the * {@link org.apache.nutch.crawl.Generator} doesn't ensure that fetchlist parts * for each map task are disjoint. * </p> - * <h4>Duplicate content</h4> + * <h3>Duplicate content</h3> * Merging segments removes older content whenever possible (see below). * However, this is NOT the same as de-duplication, which in addition removes * identical content found at different URL-s. In other words, running @@ -108,15 +110,15 @@ import org.apache.nutch.util.NutchJob; * segments be named in an increasing lexicographic order as their creation time * increases. * </p> - * <h4>Merging and indexes</h4> + * <h3>Merging and indexes</h3> * <p> * Merged segment gets a different name. Since Indexer embeds segment names in * indexes, any indexes originally created for the input segments will NOT work * with the merged segment. Newly created merged segment(s) need to be indexed * afresh. This tool doesn't use existing indexes in any way, so if you plan to * merge segments you don't have to index them prior to merging. - * - * @author Andrzej Bialecki + * </p> + * </section> */ public class SegmentMerger extends Configured implements Tool{ private static final Logger LOG = LoggerFactory diff --git a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java index 0a93947e4..b514a63fc 100644 --- a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java +++ b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java @@ -38,19 +38,17 @@ import org.apache.hadoop.util.ReflectionUtils; /** * The <code>ArchRecordReader</code> class provides a record reader which reads * records from arc files. - * <p> + * * Arc files are essentially tars of gzips. Each record in an arc file is a * compressed gzip. Multiple records are concatenated together to form a - * complete arc.</p> - * <p>For more information on the arc file format - * @see <a href='http://www.archive.org/web/researcher/ArcFileFormat.php'>ArcFileFormat</a>. - * </p> + * complete arc. * - * <p> - * Arc files are used by the internet archive and grub projects. - * </p> + * For more information on the arc file format + * @see <a href='http://www.archive.org/web/researcher/ArcFileFormat.php'>ArcFileFormat</a>. + + * Arc files are used by the Internet Archive and grub projects. * - * @see <a href='http://www.archive.org/'>archive.org</a> + * @see <a href='https://www.archive.org/'>archive.org</a> * @see <a href='http://www.grub.org/'>grub.org</a> */ public class ArcRecordReader extends RecordReader<Text, BytesWritable> { diff --git a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java index dd8605f79..5edf5fc38 100644 --- a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java +++ b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java @@ -78,6 +78,9 @@ import java.net.MalformedURLException; * expressions, it only accepts literal suffixes. I.e. a suffix "+*.jpg" is most * probably wrong, you should use "+.jpg" instead. * </p> + * + * <section> + * <h2>Examples</h2> * <h3>Example 1</h3> * <p> * The configuration shown below will accept all URLs with '.html' or '.htm' @@ -96,7 +99,7 @@ import java.net.MalformedURLException; * .htm * </pre> * - * <h4>Example 2</h4> + * <h3>Example 2</h3> * <p> * The configuration shown below will accept all URLs except common graphical * formats. @@ -115,8 +118,7 @@ import java.net.MalformedURLException; * .jpeg * .bmp * </pre> - * - * @author Andrzej Bialecki + * </section> */ public class SuffixURLFilter implements URLFilter {