This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new 0fae6b59f NUTCH-2997 Add Override annotations 0fae6b59f is described below commit 0fae6b59fd85f2ec894a28089c1d086b2604660a Author: Sebastian Nagel <sna...@apache.org> AuthorDate: Mon Aug 14 16:08:58 2023 +0200 NUTCH-2997 Add Override annotations --- src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java | 8 ++++++++ src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java | 1 + src/java/org/apache/nutch/crawl/CrawlDatum.java | 8 ++++++++ src/java/org/apache/nutch/crawl/CrawlDbReducer.java | 1 + src/java/org/apache/nutch/crawl/Generator.java | 5 +++++ src/java/org/apache/nutch/crawl/Inlink.java | 5 +++++ src/java/org/apache/nutch/crawl/Inlinks.java | 3 +++ src/java/org/apache/nutch/crawl/LinkDbReader.java | 1 + src/java/org/apache/nutch/crawl/MD5Signature.java | 1 + src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java | 1 + src/java/org/apache/nutch/crawl/Signature.java | 2 ++ src/java/org/apache/nutch/crawl/SignatureComparator.java | 1 + src/java/org/apache/nutch/crawl/TextMD5Signature.java | 1 + src/java/org/apache/nutch/crawl/TextProfileSignature.java | 3 +++ src/java/org/apache/nutch/crawl/URLPartitioner.java | 1 + src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java | 2 ++ src/java/org/apache/nutch/fetcher/FetcherThread.java | 1 + src/java/org/apache/nutch/fetcher/QueueFeeder.java | 1 + src/java/org/apache/nutch/hostdb/ResolverThread.java | 1 + src/java/org/apache/nutch/indexer/IndexerOutputFormat.java | 2 ++ src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java | 1 + src/java/org/apache/nutch/indexer/NutchDocument.java | 4 ++++ src/java/org/apache/nutch/indexer/NutchIndexAction.java | 2 ++ src/java/org/apache/nutch/metadata/MetaWrapper.java | 2 ++ src/java/org/apache/nutch/metadata/Metadata.java | 3 +++ src/java/org/apache/nutch/net/URLFilterChecker.java | 1 + src/java/org/apache/nutch/net/URLNormalizerChecker.java | 1 + src/java/org/apache/nutch/parse/HTMLMetaTags.java | 1 + src/java/org/apache/nutch/parse/Outlink.java | 4 ++++ src/java/org/apache/nutch/parse/ParseData.java | 4 ++++ src/java/org/apache/nutch/parse/ParseImpl.java | 5 +++++ src/java/org/apache/nutch/parse/ParseOutputFormat.java | 3 +++ src/java/org/apache/nutch/parse/ParseResult.java | 1 + src/java/org/apache/nutch/parse/ParseStatus.java | 7 +++++++ src/java/org/apache/nutch/parse/ParseText.java | 2 ++ src/java/org/apache/nutch/parse/ParserChecker.java | 1 + src/java/org/apache/nutch/plugin/Extension.java | 1 + src/java/org/apache/nutch/plugin/Plugin.java | 1 + src/java/org/apache/nutch/plugin/PluginClassLoader.java | 3 +++ src/java/org/apache/nutch/plugin/PluginRepository.java | 2 ++ src/java/org/apache/nutch/protocol/Content.java | 4 ++++ src/java/org/apache/nutch/protocol/ProtocolStatus.java | 4 ++++ src/java/org/apache/nutch/scoring/ScoringFilters.java | 9 +++++++++ src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java | 3 +++ src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java | 4 ++++ src/java/org/apache/nutch/scoring/webgraph/Node.java | 3 +++ src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java | 6 ++++++ src/java/org/apache/nutch/segment/SegmentMerger.java | 1 + src/java/org/apache/nutch/segment/SegmentPart.java | 1 + src/java/org/apache/nutch/segment/SegmentReader.java | 6 ++++++ src/java/org/apache/nutch/service/impl/ConfManagerImpl.java | 6 ++++++ src/java/org/apache/nutch/service/impl/SeedManagerImpl.java | 4 ++++ src/java/org/apache/nutch/service/resources/AdminResource.java | 1 + src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java | 4 ++++ src/java/org/apache/nutch/tools/CommonCrawlFormat.java | 1 + src/java/org/apache/nutch/tools/CommonCrawlFormatSimple.java | 9 ++++++--- src/java/org/apache/nutch/tools/CommonCrawlFormatWARC.java | 1 + src/java/org/apache/nutch/tools/DmozParser.java | 2 ++ src/java/org/apache/nutch/tools/ResolveUrls.java | 1 + src/java/org/apache/nutch/tools/arc/ArcInputFormat.java | 1 + src/java/org/apache/nutch/tools/arc/ArcRecordReader.java | 6 ++++++ src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java | 3 +++ src/java/org/apache/nutch/util/AbstractChecker.java | 1 + src/java/org/apache/nutch/util/CommandRunner.java | 1 + src/java/org/apache/nutch/util/EncodingDetector.java | 1 + src/java/org/apache/nutch/util/GenericWritableConfigurable.java | 2 ++ src/java/org/apache/nutch/util/PrefixStringMatcher.java | 3 +++ src/java/org/apache/nutch/util/SuffixStringMatcher.java | 3 +++ src/java/org/apache/nutch/util/TrieStringMatcher.java | 1 + .../nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java | 1 + .../java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java | 1 + .../org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java | 1 + .../src/java/org/apache/nutch/parse/html/DOMContentUtils.java | 1 + .../src/java/org/apache/nutch/parse/tika/DOMContentUtils.java | 1 + .../test/org/apache/nutch/parse/tika/TestEmbeddedDocuments.java | 1 + .../src/java/org/apache/nutch/protocol/foo/Handler.java | 1 + .../src/java/org/apache/nutch/protocol/ftp/Client.java | 1 + .../java/org/apache/nutch/protocol/ftp/PrintCommandListener.java | 2 ++ .../apache/nutch/protocol/htmlunit/DummyX509TrustManager.java | 3 +++ .../org/apache/nutch/protocol/http/DummyX509TrustManager.java | 3 +++ .../src/java/org/apache/nutch/protocol/http/HttpResponse.java | 5 +++++ .../nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java | 6 ++++++ .../apache/nutch/protocol/httpclient/DummyX509TrustManager.java | 3 +++ .../nutch/protocol/httpclient/HttpAuthenticationFactory.java | 2 ++ .../nutch/protocol/httpclient/HttpBasicAuthentication.java | 4 ++++ .../java/org/apache/nutch/protocol/httpclient/HttpResponse.java | 5 +++++ .../protocol/interactiveselenium/DummyX509TrustManager.java | 3 +++ .../apache/nutch/protocol/interactiveselenium/HttpResponse.java | 5 +++++ .../handlers/DefalultMultiInteractionHandler.java | 2 ++ .../handlers/DefaultClickAllAjaxLinksHandler.java | 2 ++ .../java/org/apache/nutch/protocol/okhttp/OkHttpResponse.java | 5 +++++ .../apache/nutch/protocol/selenium/DummyX509TrustManager.java | 3 +++ .../java/org/apache/nutch/protocol/selenium/HttpResponse.java | 5 +++++ .../org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java | 4 ++++ .../apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java | 1 + .../src/java/org/apache/nutch/urlfilter/fast/FastURLFilter.java | 4 ++++ .../test/org/apache/nutch/urlfilter/fast/TestFastURLFilter.java | 1 + .../apache/nutch/urlfilter/ignoreexempt/ExemptionUrlFilter.java | 1 + .../org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java | 1 + .../java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java | 4 ++++ .../org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java | 1 + .../apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java | 1 + .../nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java | 1 + src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java | 8 ++++++++ src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java | 8 ++++++++ src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java | 1 + src/test/org/apache/nutch/crawl/TestGenerator.java | 1 + src/test/org/apache/nutch/plugin/HelloWorldExtension.java | 1 + src/test/org/apache/nutch/plugin/SimpleTestPlugin.java | 2 ++ 109 files changed, 293 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java b/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java index 25570c6a6..2f05a3420 100644 --- a/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java +++ b/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java @@ -48,6 +48,7 @@ public abstract class AbstractFetchSchedule extends Configured implements super(conf); } + @Override public void setConf(Configuration conf) { super.setConf(conf); if (conf == null) @@ -70,6 +71,7 @@ public abstract class AbstractFetchSchedule extends Configured implements * @param datum * datum instance to be initialized (modified in place). */ + @Override public CrawlDatum initializeSchedule(Text url, CrawlDatum datum) { datum.setFetchTime(System.currentTimeMillis()); datum.setFetchInterval(defaultInterval); @@ -83,6 +85,7 @@ public abstract class AbstractFetchSchedule extends Configured implements * counter - extending classes should call super.setFetchSchedule() to * preserve this behavior. */ + @Override public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum, long prevFetchTime, long prevModifiedTime, long fetchTime, long modifiedTime, int state) { @@ -106,6 +109,7 @@ public abstract class AbstractFetchSchedule extends Configured implements * implementations should make sure that it contains at least all * information from @see CrawlDatum. */ + @Override public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum, long prevFetchTime, long prevModifiedTime, long fetchTime) { // no page is truly GONE ... just increase the interval by 50% @@ -143,6 +147,7 @@ public abstract class AbstractFetchSchedule extends Configured implements * implementations should make sure that it contains at least all * information from @see CrawlDatum. */ + @Override public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum, long prevFetchTime, long prevModifiedTime, long fetchTime) { datum.setFetchTime(fetchTime + (long) SECONDS_PER_DAY * 1000); @@ -155,6 +160,7 @@ public abstract class AbstractFetchSchedule extends Configured implements * * @return the date as a long. */ + @Override public long calculateLastFetchTime(CrawlDatum datum) { if (datum.getStatus() == CrawlDatum.STATUS_DB_UNFETCHED) { return 0L; @@ -186,6 +192,7 @@ public abstract class AbstractFetchSchedule extends Configured implements * @return true, if the page should be considered for inclusion in the current * fetchlist, otherwise false. */ + @Override public boolean shouldFetch(Text url, CrawlDatum datum, long curTime) { // pages are never truly GONE - we have to check them from time to time. // pages with too long a fetchInterval are adjusted so that they fit within @@ -217,6 +224,7 @@ public abstract class AbstractFetchSchedule extends Configured implements * fetchTime to now. If false, force refetch whenever the next fetch * time is set. */ + @Override public CrawlDatum forceRefetch(Text url, CrawlDatum datum, boolean asap) { // reduce fetchInterval so that it fits within the max value if (datum.getFetchInterval() > maxInterval) diff --git a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java index 006c90004..5bccd4f30 100644 --- a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java +++ b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java @@ -79,6 +79,7 @@ public class AdaptiveFetchSchedule extends AbstractFetchSchedule { private double SYNC_DELTA_RATE; + @Override public void setConf(Configuration conf) { super.setConf(conf); if (conf == null) diff --git a/src/java/org/apache/nutch/crawl/CrawlDatum.java b/src/java/org/apache/nutch/crawl/CrawlDatum.java index f32cec1ba..c76fc0f33 100644 --- a/src/java/org/apache/nutch/crawl/CrawlDatum.java +++ b/src/java/org/apache/nutch/crawl/CrawlDatum.java @@ -303,6 +303,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { return result; } + @Override public void readFields(DataInput in) throws IOException { byte version = in.readByte(); // read version if (version > CUR_VERSION) // check version @@ -362,6 +363,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { private static final int SCORE_OFFSET = 15; private static final int SIG_OFFSET = SCORE_OFFSET + 12; + @Override public void write(DataOutput out) throws IOException { out.writeByte(CUR_VERSION); // store current version out.writeByte(status); @@ -416,6 +418,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { * minus the correspoinding field of the existing {@link CrawlDatum} * is greater than 0, otherwise return -1. */ + @Override public int compareTo(CrawlDatum that) { if (that.score != this.score) return (that.score - this.score) > 0 ? 1 : -1; @@ -438,6 +441,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { super(CrawlDatum.class); } + @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { float score1 = readFloat(b1, s1 + SCORE_OFFSET); float score2 = readFloat(b2, s2 + SCORE_OFFSET); @@ -479,6 +483,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { // basic methods // + @Override public String toString() { StringBuilder buf = new StringBuilder(); buf.append("Version: " + CUR_VERSION + "\n"); @@ -519,6 +524,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { return set1.equals(set2); } + @Override public boolean equals(Object o) { if (!(o instanceof CrawlDatum)) return false; @@ -535,6 +541,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { return metadataEquals(other.metaData); } + @Override public int hashCode() { int res = 0; if (signature != null) { @@ -550,6 +557,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { ^ fetchInterval ^ Float.floatToIntBits(score); } + @Override public Object clone() { try { return super.clone(); diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReducer.java b/src/java/org/apache/nutch/crawl/CrawlDbReducer.java index bfc62c397..f00cfc5f8 100644 --- a/src/java/org/apache/nutch/crawl/CrawlDbReducer.java +++ b/src/java/org/apache/nutch/crawl/CrawlDbReducer.java @@ -332,6 +332,7 @@ class InlinkPriorityQueue extends PriorityQueue<CrawlDatum> { } /** Determines the ordering of objects in this priority queue. **/ + @Override protected boolean lessThan(Object arg0, Object arg1) { CrawlDatum candidate = (CrawlDatum) arg0; CrawlDatum least = (CrawlDatum) arg1; diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java index 8e085428d..d1569e1f0 100644 --- a/src/java/org/apache/nutch/crawl/Generator.java +++ b/src/java/org/apache/nutch/crawl/Generator.java @@ -124,18 +124,21 @@ public class Generator extends NutchTool implements Tool { segnum = new IntWritable(0); } + @Override public void readFields(DataInput in) throws IOException { url.readFields(in); datum.readFields(in); segnum.readFields(in); } + @Override public void write(DataOutput out) throws IOException { url.write(out); datum.write(out); segnum.write(out); } + @Override public String toString() { return "url=" + url.toString() + ", datum=" + datum.toString() + ", segnum=" + segnum.toString(); @@ -149,6 +152,7 @@ public class Generator extends NutchTool implements Tool { private final URLPartitioner partitioner = new URLPartitioner(); /** Partition by host / domain or IP. */ + @Override public int getPartition(FloatWritable key, Writable value, int numReduceTasks) { return partitioner.getPartition(((SelectorEntry) value).url, key, @@ -560,6 +564,7 @@ public class Generator extends NutchTool implements Tool { extends FloatWritable.Comparator { /** Compares two FloatWritables decreasing. */ + @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { return super.compare(b2, s2, l2, b1, s1, l1); } diff --git a/src/java/org/apache/nutch/crawl/Inlink.java b/src/java/org/apache/nutch/crawl/Inlink.java index d303882d2..6304c968b 100644 --- a/src/java/org/apache/nutch/crawl/Inlink.java +++ b/src/java/org/apache/nutch/crawl/Inlink.java @@ -37,6 +37,7 @@ public class Inlink implements Writable { this.anchor = anchor; } + @Override public void readFields(DataInput in) throws IOException { fromUrl = Text.readString(in); anchor = Text.readString(in); @@ -52,6 +53,7 @@ public class Inlink implements Writable { Text.skip(in); // skip anchor } + @Override public void write(DataOutput out) throws IOException { Text.writeString(out, fromUrl); Text.writeString(out, anchor); @@ -71,6 +73,7 @@ public class Inlink implements Writable { return anchor; } + @Override public boolean equals(Object o) { if (!(o instanceof Inlink)) return false; @@ -79,10 +82,12 @@ public class Inlink implements Writable { && this.anchor.equals(other.anchor); } + @Override public int hashCode() { return fromUrl.hashCode() ^ anchor.hashCode(); } + @Override public String toString() { return "fromUrl: " + fromUrl + " anchor: " + anchor; } diff --git a/src/java/org/apache/nutch/crawl/Inlinks.java b/src/java/org/apache/nutch/crawl/Inlinks.java index ae8413801..55163758b 100644 --- a/src/java/org/apache/nutch/crawl/Inlinks.java +++ b/src/java/org/apache/nutch/crawl/Inlinks.java @@ -53,6 +53,7 @@ public class Inlinks implements Writable { inlinks.clear(); } + @Override public void readFields(DataInput in) throws IOException { int length = in.readInt(); inlinks.clear(); @@ -61,6 +62,7 @@ public class Inlinks implements Writable { } } + @Override public void write(DataOutput out) throws IOException { out.writeInt(inlinks.size()); Iterator<Inlink> it = inlinks.iterator(); @@ -69,6 +71,7 @@ public class Inlinks implements Writable { } } + @Override public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append("Inlinks:\n"); diff --git a/src/java/org/apache/nutch/crawl/LinkDbReader.java b/src/java/org/apache/nutch/crawl/LinkDbReader.java index 2bcceee8f..c307b985d 100644 --- a/src/java/org/apache/nutch/crawl/LinkDbReader.java +++ b/src/java/org/apache/nutch/crawl/LinkDbReader.java @@ -197,6 +197,7 @@ public class LinkDbReader extends AbstractChecker implements Closeable { sdf.format(end), TimingUtil.elapsedTime(start, end)); } + @Override protected int process(String line, StringBuilder output) throws Exception { Inlinks links = getInlinks(new Text(line)); diff --git a/src/java/org/apache/nutch/crawl/MD5Signature.java b/src/java/org/apache/nutch/crawl/MD5Signature.java index 3d163f3e8..e96c42207 100644 --- a/src/java/org/apache/nutch/crawl/MD5Signature.java +++ b/src/java/org/apache/nutch/crawl/MD5Signature.java @@ -29,6 +29,7 @@ import org.apache.nutch.protocol.Content; */ public class MD5Signature extends Signature { + @Override public byte[] calculate(Content content, Parse parse) { byte[] data = content.getContent(); if (data == null || (data.length == 0)) diff --git a/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java b/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java index 1f0301349..a181fbf0d 100644 --- a/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java +++ b/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java @@ -77,6 +77,7 @@ public class MimeAdaptiveFetchSchedule extends AdaptiveFetchSchedule { // Here we store the mime's and their delta's private HashMap<String, AdaptiveRate> mimeMap; + @Override public void setConf(Configuration conf) { super.setConf(conf); if (conf == null) diff --git a/src/java/org/apache/nutch/crawl/Signature.java b/src/java/org/apache/nutch/crawl/Signature.java index b44432690..65d17a49c 100644 --- a/src/java/org/apache/nutch/crawl/Signature.java +++ b/src/java/org/apache/nutch/crawl/Signature.java @@ -26,10 +26,12 @@ public abstract class Signature implements Configurable { public abstract byte[] calculate(Content content, Parse parse); + @Override public Configuration getConf() { return conf; } + @Override public void setConf(Configuration conf) { this.conf = conf; } diff --git a/src/java/org/apache/nutch/crawl/SignatureComparator.java b/src/java/org/apache/nutch/crawl/SignatureComparator.java index 36af1cad5..ecbdc437d 100644 --- a/src/java/org/apache/nutch/crawl/SignatureComparator.java +++ b/src/java/org/apache/nutch/crawl/SignatureComparator.java @@ -19,6 +19,7 @@ package org.apache.nutch.crawl; import java.util.Comparator; public class SignatureComparator implements Comparator<Object> { + @Override public int compare(Object o1, Object o2) { return _compare(o1, o2); } diff --git a/src/java/org/apache/nutch/crawl/TextMD5Signature.java b/src/java/org/apache/nutch/crawl/TextMD5Signature.java index 1fe0c6c72..33a2ac3be 100644 --- a/src/java/org/apache/nutch/crawl/TextMD5Signature.java +++ b/src/java/org/apache/nutch/crawl/TextMD5Signature.java @@ -29,6 +29,7 @@ public class TextMD5Signature extends Signature { Signature fallback = new MD5Signature(); + @Override public byte[] calculate(Content content, Parse parse) { String text = parse.getText(); diff --git a/src/java/org/apache/nutch/crawl/TextProfileSignature.java b/src/java/org/apache/nutch/crawl/TextProfileSignature.java index 049206a70..cc3e2addf 100644 --- a/src/java/org/apache/nutch/crawl/TextProfileSignature.java +++ b/src/java/org/apache/nutch/crawl/TextProfileSignature.java @@ -83,6 +83,7 @@ public class TextProfileSignature extends Signature { "db.signature.text_profile.sec_sort_lex", true); } + @Override public byte[] calculate(Content content, Parse parse) { HashMap<String, Token> tokens = new HashMap<>(); String text = null; @@ -168,6 +169,7 @@ public class TextProfileSignature extends Signature { this.val = val; } + @Override public String toString() { return val + " " + cnt; } @@ -178,6 +180,7 @@ public class TextProfileSignature extends Signature { * Sort tokens first by decreasing frequency and second in lexicographic * (Unicode) order */ + @Override public int compare(Token t1, Token t2) { int diffCnt = t2.cnt - t1.cnt; if (diffCnt == 0 && secondaryLexicographicSorting) { diff --git a/src/java/org/apache/nutch/crawl/URLPartitioner.java b/src/java/org/apache/nutch/crawl/URLPartitioner.java index d9e6c4c6d..8f0764aab 100644 --- a/src/java/org/apache/nutch/crawl/URLPartitioner.java +++ b/src/java/org/apache/nutch/crawl/URLPartitioner.java @@ -72,6 +72,7 @@ public class URLPartitioner extends Partitioner<Text, Writable> implements Confi } /** Hash by host or domain name or IP address. */ + @Override public int getPartition(Text key, Writable value, int numReduceTasks) { String urlString = key.toString(); URL url = null; diff --git a/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java b/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java index 6d27327bc..12dca9a94 100644 --- a/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java +++ b/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java @@ -96,6 +96,7 @@ public class FetcherOutputFormat extends FileOutputFormat<Text, NutchWritable> { } } + @Override public void write(Text key, NutchWritable value) throws IOException, InterruptedException { Writable w = value.get(); @@ -108,6 +109,7 @@ public class FetcherOutputFormat extends FileOutputFormat<Text, NutchWritable> { parseOut.write(key, (Parse) w); } + @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { fetchOut.close(); if (contentOut != null) { diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java index d5fe343aa..f48f6076a 100644 --- a/src/java/org/apache/nutch/fetcher/FetcherThread.java +++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java @@ -243,6 +243,7 @@ public class FetcherThread extends Thread { } } + @Override @SuppressWarnings("fallthrough") public void run() { activeThreads.incrementAndGet(); // count threads diff --git a/src/java/org/apache/nutch/fetcher/QueueFeeder.java b/src/java/org/apache/nutch/fetcher/QueueFeeder.java index e327af11d..025d02206 100644 --- a/src/java/org/apache/nutch/fetcher/QueueFeeder.java +++ b/src/java/org/apache/nutch/fetcher/QueueFeeder.java @@ -84,6 +84,7 @@ public class QueueFeeder extends Thread { return url; } + @Override public void run() { boolean hasMore = true; int cnt = 0; diff --git a/src/java/org/apache/nutch/hostdb/ResolverThread.java b/src/java/org/apache/nutch/hostdb/ResolverThread.java index cf749f41c..434e7bb31 100644 --- a/src/java/org/apache/nutch/hostdb/ResolverThread.java +++ b/src/java/org/apache/nutch/hostdb/ResolverThread.java @@ -63,6 +63,7 @@ public class ResolverThread implements Runnable { /** * */ + @Override public void run() { // Resolve the host and act appropriatly try { diff --git a/src/java/org/apache/nutch/indexer/IndexerOutputFormat.java b/src/java/org/apache/nutch/indexer/IndexerOutputFormat.java index 737c91b2b..9bb3b6fda 100644 --- a/src/java/org/apache/nutch/indexer/IndexerOutputFormat.java +++ b/src/java/org/apache/nutch/indexer/IndexerOutputFormat.java @@ -46,6 +46,7 @@ public class IndexerOutputFormat return new RecordWriter<Text, NutchIndexAction>() { + @Override public void close(TaskAttemptContext context) throws IOException { // do the commits once and for all the reducers in one go boolean noCommit = conf @@ -56,6 +57,7 @@ public class IndexerOutputFormat writers.close(); } + @Override public void write(Text key, NutchIndexAction indexAction) throws IOException { if (indexAction.action == NutchIndexAction.ADD) { diff --git a/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java b/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java index 84d9f6ded..3aa7a05cb 100644 --- a/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java +++ b/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java @@ -140,6 +140,7 @@ public class IndexingFiltersChecker extends AbstractChecker { } } + @Override protected int process(String url, StringBuilder output) throws Exception { if (normalizers != null) { url = normalizers.normalize(url, URLNormalizers.SCOPE_DEFAULT); diff --git a/src/java/org/apache/nutch/indexer/NutchDocument.java b/src/java/org/apache/nutch/indexer/NutchDocument.java index bbe622195..0f95741be 100644 --- a/src/java/org/apache/nutch/indexer/NutchDocument.java +++ b/src/java/org/apache/nutch/indexer/NutchDocument.java @@ -83,6 +83,7 @@ public class NutchDocument implements Writable, } /** Iterate over all fields. */ + @Override public Iterator<Entry<String, NutchField>> iterator() { return fields.entrySet().iterator(); } @@ -99,6 +100,7 @@ public class NutchDocument implements Writable, return documentMeta; } + @Override public void readFields(DataInput in) throws IOException { fields.clear(); byte version = in.readByte(); @@ -116,6 +118,7 @@ public class NutchDocument implements Writable, documentMeta.readFields(in); } + @Override public void write(DataOutput out) throws IOException { out.writeByte(VERSION); WritableUtils.writeVInt(out, fields.size()); @@ -128,6 +131,7 @@ public class NutchDocument implements Writable, documentMeta.write(out); } + @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("doc {\n"); diff --git a/src/java/org/apache/nutch/indexer/NutchIndexAction.java b/src/java/org/apache/nutch/indexer/NutchIndexAction.java index b2517c3ad..80016b7aa 100644 --- a/src/java/org/apache/nutch/indexer/NutchIndexAction.java +++ b/src/java/org/apache/nutch/indexer/NutchIndexAction.java @@ -45,12 +45,14 @@ public class NutchIndexAction implements Writable { this.action = action; } + @Override public void readFields(DataInput in) throws IOException { action = in.readByte(); doc = new NutchDocument(); doc.readFields(in); } + @Override public void write(DataOutput out) throws IOException { out.write(action); doc.write(out); diff --git a/src/java/org/apache/nutch/metadata/MetaWrapper.java b/src/java/org/apache/nutch/metadata/MetaWrapper.java index 254773493..f78e957e5 100644 --- a/src/java/org/apache/nutch/metadata/MetaWrapper.java +++ b/src/java/org/apache/nutch/metadata/MetaWrapper.java @@ -102,12 +102,14 @@ public class MetaWrapper extends NutchWritable { return metadata.getValues(name); } + @Override public void readFields(DataInput in) throws IOException { super.readFields(in); metadata = new Metadata(); metadata.readFields(in); } + @Override public void write(DataOutput out) throws IOException { super.write(out); metadata.write(out); diff --git a/src/java/org/apache/nutch/metadata/Metadata.java b/src/java/org/apache/nutch/metadata/Metadata.java index 8a3619595..5c37911fb 100644 --- a/src/java/org/apache/nutch/metadata/Metadata.java +++ b/src/java/org/apache/nutch/metadata/Metadata.java @@ -199,6 +199,7 @@ public class Metadata implements Writable, CreativeCommons, DublinCore, metadata.clear(); } + @Override public boolean equals(Object o) { if (o == null) { @@ -259,6 +260,7 @@ public class Metadata implements Writable, CreativeCommons, DublinCore, return buf.toString(); } + @Override public final void write(DataOutput out) throws IOException { out.writeInt(size()); String[] values = null; @@ -280,6 +282,7 @@ public class Metadata implements Writable, CreativeCommons, DublinCore, } } + @Override public final void readFields(DataInput in) throws IOException { int keySize = in.readInt(); String key; diff --git a/src/java/org/apache/nutch/net/URLFilterChecker.java b/src/java/org/apache/nutch/net/URLFilterChecker.java index 4e613d062..7916cc579 100644 --- a/src/java/org/apache/nutch/net/URLFilterChecker.java +++ b/src/java/org/apache/nutch/net/URLFilterChecker.java @@ -69,6 +69,7 @@ public class URLFilterChecker extends AbstractChecker { return super.run(); } + @Override protected int process(String line, StringBuilder output) throws Exception { String out = filters.filter(line); if (out != null) { diff --git a/src/java/org/apache/nutch/net/URLNormalizerChecker.java b/src/java/org/apache/nutch/net/URLNormalizerChecker.java index fa0baa289..586c7b246 100644 --- a/src/java/org/apache/nutch/net/URLNormalizerChecker.java +++ b/src/java/org/apache/nutch/net/URLNormalizerChecker.java @@ -74,6 +74,7 @@ public class URLNormalizerChecker extends AbstractChecker { return super.run(); } + @Override protected int process(String line, StringBuilder output) throws Exception { try { String norm = normalizers.normalize(line, scope); diff --git a/src/java/org/apache/nutch/parse/HTMLMetaTags.java b/src/java/org/apache/nutch/parse/HTMLMetaTags.java index 4e20a5fda..ab4adb40a 100644 --- a/src/java/org/apache/nutch/parse/HTMLMetaTags.java +++ b/src/java/org/apache/nutch/parse/HTMLMetaTags.java @@ -208,6 +208,7 @@ public class HTMLMetaTags { return httpEquivTags; } + @Override public String toString() { StringBuffer sb = new StringBuffer(); sb.append("base=" + baseHref + ", noCache=" + noCache + ", noFollow=" diff --git a/src/java/org/apache/nutch/parse/Outlink.java b/src/java/org/apache/nutch/parse/Outlink.java index 6261346a2..0db7cdce2 100644 --- a/src/java/org/apache/nutch/parse/Outlink.java +++ b/src/java/org/apache/nutch/parse/Outlink.java @@ -44,6 +44,7 @@ public class Outlink implements Writable { md = null; } + @Override public void readFields(DataInput in) throws IOException { toUrl = Text.readString(in); anchor = Text.readString(in); @@ -72,6 +73,7 @@ public class Outlink implements Writable { } } + @Override public void write(DataOutput out) throws IOException { Text.writeString(out, toUrl); Text.writeString(out, anchor); @@ -109,6 +111,7 @@ public class Outlink implements Writable { this.md = md; } + @Override public boolean equals(Object o) { if (!(o instanceof Outlink)) return false; @@ -116,6 +119,7 @@ public class Outlink implements Writable { return this.toUrl.equals(other.toUrl) && this.anchor.equals(other.anchor); } + @Override public String toString() { StringBuffer repr = new StringBuffer("toUrl: "); repr.append(toUrl); diff --git a/src/java/org/apache/nutch/parse/ParseData.java b/src/java/org/apache/nutch/parse/ParseData.java index a34d8def0..31a720f44 100644 --- a/src/java/org/apache/nutch/parse/ParseData.java +++ b/src/java/org/apache/nutch/parse/ParseData.java @@ -137,10 +137,12 @@ public final class ParseData extends VersionedWritable { return value; } + @Override public byte getVersion() { return version; } + @Override public final void readFields(DataInput in) throws IOException { version = in.readByte(); @@ -162,6 +164,7 @@ public final class ParseData extends VersionedWritable { parseMeta.readFields(in); } + @Override public final void write(DataOutput out) throws IOException { out.writeByte(VERSION); // write version status.write(out); // write status @@ -181,6 +184,7 @@ public final class ParseData extends VersionedWritable { return parseText; } + @Override public boolean equals(Object o) { if (!(o instanceof ParseData)) return false; diff --git a/src/java/org/apache/nutch/parse/ParseImpl.java b/src/java/org/apache/nutch/parse/ParseImpl.java index 9eb3272ec..ef4dd167b 100644 --- a/src/java/org/apache/nutch/parse/ParseImpl.java +++ b/src/java/org/apache/nutch/parse/ParseImpl.java @@ -53,24 +53,29 @@ public class ParseImpl implements Parse, Writable { this.isCanonical = isCanonical; } + @Override public String getText() { return text.getText(); } + @Override public ParseData getData() { return data; } + @Override public boolean isCanonical() { return isCanonical; } + @Override public final void write(DataOutput out) throws IOException { out.writeBoolean(isCanonical); text.write(out); data.write(out); } + @Override public void readFields(DataInput in) throws IOException { isCanonical = in.readBoolean(); text = new ParseText(); diff --git a/src/java/org/apache/nutch/parse/ParseOutputFormat.java b/src/java/org/apache/nutch/parse/ParseOutputFormat.java index d47043c17..27ece06c1 100644 --- a/src/java/org/apache/nutch/parse/ParseOutputFormat.java +++ b/src/java/org/apache/nutch/parse/ParseOutputFormat.java @@ -83,14 +83,17 @@ public class ParseOutputFormat extends OutputFormat<Text, Parse> { this.value = value; } + @Override public Text getKey() { return key; } + @Override public CrawlDatum getValue() { return value; } + @Override public CrawlDatum setValue(CrawlDatum value) { this.value = value; return this.value; diff --git a/src/java/org/apache/nutch/parse/ParseResult.java b/src/java/org/apache/nutch/parse/ParseResult.java index 1ea0abb17..30370516b 100644 --- a/src/java/org/apache/nutch/parse/ParseResult.java +++ b/src/java/org/apache/nutch/parse/ParseResult.java @@ -144,6 +144,7 @@ public class ParseResult implements Iterable<Map.Entry<Text, Parse>> { /** * Iterate over all entries in the <url, Parse> map. */ + @Override public Iterator<Entry<Text, Parse>> iterator() { return parseMap.entrySet().iterator(); } diff --git a/src/java/org/apache/nutch/parse/ParseStatus.java b/src/java/org/apache/nutch/parse/ParseStatus.java index feaef47ff..052a34224 100644 --- a/src/java/org/apache/nutch/parse/ParseStatus.java +++ b/src/java/org/apache/nutch/parse/ParseStatus.java @@ -150,6 +150,7 @@ public class ParseStatus implements Writable { return res; } + @Override public void readFields(DataInput in) throws IOException { byte version = in.readByte(); switch (version) { @@ -168,6 +169,7 @@ public class ParseStatus implements Writable { } } + @Override public void write(DataOutput out) throws IOException { out.writeByte(VERSION); out.writeByte(majorCode); @@ -228,6 +230,7 @@ public class ParseStatus implements Writable { return ParseResult.createParseResult(url, getEmptyParse(conf)); } + @Override public String toString() { StringBuffer res = new StringBuffer(); String name = null; @@ -268,6 +271,7 @@ public class ParseStatus implements Writable { this.minorCode = minorCode; } + @Override public boolean equals(Object o) { if (o == null) return false; @@ -306,14 +310,17 @@ public class ParseStatus implements Writable { new Metadata()); } + @Override public ParseData getData() { return data; } + @Override public String getText() { return ""; } + @Override public boolean isCanonical() { return true; } diff --git a/src/java/org/apache/nutch/parse/ParseText.java b/src/java/org/apache/nutch/parse/ParseText.java index b93bc4761..18d0f6be7 100644 --- a/src/java/org/apache/nutch/parse/ParseText.java +++ b/src/java/org/apache/nutch/parse/ParseText.java @@ -51,6 +51,7 @@ public final class ParseText implements Writable { this.text = text; } + @Override public void readFields(DataInput in) throws IOException { byte version = in.readByte(); switch (version) { @@ -65,6 +66,7 @@ public final class ParseText implements Writable { } } + @Override public final void write(DataOutput out) throws IOException { out.write(VERSION); Text.writeString(out, text); diff --git a/src/java/org/apache/nutch/parse/ParserChecker.java b/src/java/org/apache/nutch/parse/ParserChecker.java index 5da023fdc..1533ab57c 100644 --- a/src/java/org/apache/nutch/parse/ParserChecker.java +++ b/src/java/org/apache/nutch/parse/ParserChecker.java @@ -154,6 +154,7 @@ public class ParserChecker extends AbstractChecker { } } + @Override protected int process(String url, StringBuilder output) throws Exception { if (this.normalizers != null) { url = this.normalizers.normalize(url, URLNormalizers.SCOPE_DEFAULT); diff --git a/src/java/org/apache/nutch/plugin/Extension.java b/src/java/org/apache/nutch/plugin/Extension.java index e949ea317..cdc8533c0 100644 --- a/src/java/org/apache/nutch/plugin/Extension.java +++ b/src/java/org/apache/nutch/plugin/Extension.java @@ -206,6 +206,7 @@ public class Extension { fDescriptor = pDescriptor; } + @Override public String toString() { return getId() + ", " + getClazz() + ", " + getTargetPoint(); } diff --git a/src/java/org/apache/nutch/plugin/Plugin.java b/src/java/org/apache/nutch/plugin/Plugin.java index 306ada3d4..b2e717d20 100644 --- a/src/java/org/apache/nutch/plugin/Plugin.java +++ b/src/java/org/apache/nutch/plugin/Plugin.java @@ -89,6 +89,7 @@ public class Plugin { fDescriptor = descriptor; } + @Override @SuppressWarnings("deprecation") protected void finalize() throws Throwable { super.finalize(); diff --git a/src/java/org/apache/nutch/plugin/PluginClassLoader.java b/src/java/org/apache/nutch/plugin/PluginClassLoader.java index 02422f78a..a1d180ff0 100644 --- a/src/java/org/apache/nutch/plugin/PluginClassLoader.java +++ b/src/java/org/apache/nutch/plugin/PluginClassLoader.java @@ -159,16 +159,19 @@ public class PluginClassLoader extends URLClassLoader { return new Enumeration<URL>() { Iterator<URL> iter = urls.iterator(); + @Override public boolean hasMoreElements() { return iter.hasNext(); } + @Override public URL nextElement() { return iter.next(); } }; } + @Override public InputStream getResourceAsStream(String name) { URL url = getResource(name); try { diff --git a/src/java/org/apache/nutch/plugin/PluginRepository.java b/src/java/org/apache/nutch/plugin/PluginRepository.java index d80f971df..fef984bf3 100644 --- a/src/java/org/apache/nutch/plugin/PluginRepository.java +++ b/src/java/org/apache/nutch/plugin/PluginRepository.java @@ -320,6 +320,7 @@ public class PluginRepository implements URLStreamHandlerFactory { * @see java.lang.Object#finalize() * @deprecated */ + @Override @Deprecated public void finalize() throws Throwable { shutDownActivatedPlugins(); @@ -577,6 +578,7 @@ public class PluginRepository implements URLStreamHandlerFactory { * @see java.net.URL * @see <a href="https://issues.apache.org/jira/browse/NUTCH-2429">NUTCH-2429</a> */ + @Override public URLStreamHandler createURLStreamHandler(String protocol) { LOG.debug("Creating URLStreamHandler for protocol: {}", protocol); diff --git a/src/java/org/apache/nutch/protocol/Content.java b/src/java/org/apache/nutch/protocol/Content.java index 883e0fb26..edd117f3c 100644 --- a/src/java/org/apache/nutch/protocol/Content.java +++ b/src/java/org/apache/nutch/protocol/Content.java @@ -147,6 +147,7 @@ public final class Content implements Writable { } + @Override public final void readFields(DataInput in) throws IOException { metadata.clear(); int sizeOrVersion = in.readInt(); @@ -176,6 +177,7 @@ public final class Content implements Writable { } } + @Override public final void write(DataOutput out) throws IOException { out.writeInt(VERSION); @@ -256,6 +258,7 @@ public final class Content implements Writable { this.metadata = metadata; } + @Override public boolean equals(Object o) { if (!(o instanceof Content)) { return false; @@ -267,6 +270,7 @@ public final class Content implements Writable { && this.metadata.equals(that.metadata); } + @Override public String toString() { return toString(StandardCharsets.UTF_8); } diff --git a/src/java/org/apache/nutch/protocol/ProtocolStatus.java b/src/java/org/apache/nutch/protocol/ProtocolStatus.java index d9e7e3dc6..1659fda40 100644 --- a/src/java/org/apache/nutch/protocol/ProtocolStatus.java +++ b/src/java/org/apache/nutch/protocol/ProtocolStatus.java @@ -164,6 +164,7 @@ public class ProtocolStatus implements Writable { return res; } + @Override public void readFields(DataInput in) throws IOException { byte version = in.readByte(); switch (version) { @@ -182,6 +183,7 @@ public class ProtocolStatus implements Writable { } } + @Override public void write(DataOutput out) throws IOException { out.writeByte(VERSION); out.writeByte((byte) code); @@ -253,6 +255,7 @@ public class ProtocolStatus implements Writable { this.lastModified = lastModified; } + @Override public boolean equals(Object o) { if (o == null) return false; @@ -279,6 +282,7 @@ public class ProtocolStatus implements Writable { return true; } + @Override public String toString() { StringBuffer res = new StringBuffer(); res.append(codeToName.get(Integer.valueOf(code)) + "(" + code diff --git a/src/java/org/apache/nutch/scoring/ScoringFilters.java b/src/java/org/apache/nutch/scoring/ScoringFilters.java index 6c3835564..b5d36e8e5 100644 --- a/src/java/org/apache/nutch/scoring/ScoringFilters.java +++ b/src/java/org/apache/nutch/scoring/ScoringFilters.java @@ -48,6 +48,7 @@ public class ScoringFilters extends Configured implements ScoringFilter { } /** Calculate a sort value for Generate. */ + @Override public float generatorSortValue(Text url, CrawlDatum datum, float initSort) throws ScoringFilterException { for (int i = 0; i < this.filters.length; i++) { @@ -57,6 +58,7 @@ public class ScoringFilters extends Configured implements ScoringFilter { } /** Calculate a new initial score, used when adding newly discovered pages. */ + @Override public void initialScore(Text url, CrawlDatum datum) throws ScoringFilterException { for (int i = 0; i < this.filters.length; i++) { @@ -65,6 +67,7 @@ public class ScoringFilters extends Configured implements ScoringFilter { } /** Calculate a new initial score, used when injecting new pages. */ + @Override public void injectedScore(Text url, CrawlDatum datum) throws ScoringFilterException { for (int i = 0; i < this.filters.length; i++) { @@ -73,6 +76,7 @@ public class ScoringFilters extends Configured implements ScoringFilter { } /** Calculate updated page score during CrawlDb.update(). */ + @Override public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum, List<CrawlDatum> inlinked) throws ScoringFilterException { for (int i = 0; i < this.filters.length; i++) { @@ -81,6 +85,7 @@ public class ScoringFilters extends Configured implements ScoringFilter { } /** Calculate orphaned page score during CrawlDb.update(). */ + @Override public void orphanedScore(Text url, CrawlDatum datum) throws ScoringFilterException { for (int i = 0; i < this.filters.length; i++) { @@ -88,6 +93,7 @@ public class ScoringFilters extends Configured implements ScoringFilter { } } + @Override public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) throws ScoringFilterException { for (int i = 0; i < this.filters.length; i++) { @@ -95,6 +101,7 @@ public class ScoringFilters extends Configured implements ScoringFilter { } } + @Override public void passScoreAfterParsing(Text url, Content content, Parse parse) throws ScoringFilterException { for (int i = 0; i < this.filters.length; i++) { @@ -102,6 +109,7 @@ public class ScoringFilters extends Configured implements ScoringFilter { } } + @Override public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust, int allCount) throws ScoringFilterException { @@ -112,6 +120,7 @@ public class ScoringFilters extends Configured implements ScoringFilter { return adjust; } + @Override public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum, CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore) throws ScoringFilterException { diff --git a/src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java b/src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java index 67c936612..db98c69a4 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java +++ b/src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java @@ -114,6 +114,7 @@ public class LinkDatum implements Writable { this.linkType = linkType; } + @Override public void readFields(DataInput in) throws IOException { url = Text.readString(in); anchor = Text.readString(in); @@ -122,6 +123,7 @@ public class LinkDatum implements Writable { linkType = in.readByte(); } + @Override public void write(DataOutput out) throws IOException { Text.writeString(out, url); Text.writeString(out, anchor != null ? anchor : ""); @@ -130,6 +132,7 @@ public class LinkDatum implements Writable { out.writeByte(linkType); } + @Override public String toString() { String type = (linkType == INLINK ? "inlink" diff --git a/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java b/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java index 6e2c62938..54cd8b8ed 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java +++ b/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java @@ -144,12 +144,14 @@ public class LinkDumper extends Configured implements Tool { this.node = node; } + @Override public void readFields(DataInput in) throws IOException { url = in.readUTF(); node = new Node(); node.readFields(in); } + @Override public void write(DataOutput out) throws IOException { out.writeUTF(url); node.write(out); @@ -180,6 +182,7 @@ public class LinkDumper extends Configured implements Tool { this.links = links; } + @Override public void readFields(DataInput in) throws IOException { int numLinks = in.readInt(); if (numLinks > 0) { @@ -192,6 +195,7 @@ public class LinkDumper extends Configured implements Tool { } } + @Override public void write(DataOutput out) throws IOException { if (links != null && links.length > 0) { int numLinks = links.length; diff --git a/src/java/org/apache/nutch/scoring/webgraph/Node.java b/src/java/org/apache/nutch/scoring/webgraph/Node.java index a35e84288..d95ca9fc5 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/Node.java +++ b/src/java/org/apache/nutch/scoring/webgraph/Node.java @@ -76,6 +76,7 @@ public class Node implements Writable { this.metadata = metadata; } + @Override public void readFields(DataInput in) throws IOException { numInlinks = in.readInt(); @@ -85,6 +86,7 @@ public class Node implements Writable { metadata.readFields(in); } + @Override public void write(DataOutput out) throws IOException { out.writeInt(numInlinks); @@ -93,6 +95,7 @@ public class Node implements Writable { metadata.write(out); } + @Override public String toString() { return "num inlinks: " + numInlinks + ", num outlinks: " + numOutlinks + ", inlink score: " + inlinkScore + ", outlink score: " diff --git a/src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java b/src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java index 6d75cdefd..e14b9dd2b 100644 --- a/src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java +++ b/src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java @@ -53,18 +53,22 @@ public class ContentAsTextInputFormat extends innerValue = new Content(); } + @Override public Text getCurrentValue(){ return new Text(); } + @Override public Text getCurrentKey(){ return new Text(); } + @Override public boolean nextKeyValue(){ return false; } + @Override public void initialize(InputSplit split, TaskAttemptContext context){ } @@ -88,6 +92,7 @@ public class ContentAsTextInputFormat extends return true; } + @Override public float getProgress() throws IOException { return sequenceFileRecordReader.getProgress(); } @@ -96,6 +101,7 @@ public class ContentAsTextInputFormat extends return sequenceFileRecordReader.getPos(); }*/ + @Override public synchronized void close() throws IOException { sequenceFileRecordReader.close(); } diff --git a/src/java/org/apache/nutch/segment/SegmentMerger.java b/src/java/org/apache/nutch/segment/SegmentMerger.java index 6bb90e472..c884dfedf 100644 --- a/src/java/org/apache/nutch/segment/SegmentMerger.java +++ b/src/java/org/apache/nutch/segment/SegmentMerger.java @@ -227,6 +227,7 @@ public class SegmentMerger extends Configured implements Tool{ HashMap<String, Closeable> sliceWriters = new HashMap<>(); String segmentName = conf.get("segment.merger.segmentName"); + @Override public void write(Text key, MetaWrapper wrapper) throws IOException { // unwrap SegmentPart sp = SegmentPart.parse(wrapper.getMeta(SEGMENT_PART_KEY)); diff --git a/src/java/org/apache/nutch/segment/SegmentPart.java b/src/java/org/apache/nutch/segment/SegmentPart.java index 6769149a4..34f76fdb0 100644 --- a/src/java/org/apache/nutch/segment/SegmentPart.java +++ b/src/java/org/apache/nutch/segment/SegmentPart.java @@ -44,6 +44,7 @@ public class SegmentPart { * Return a String representation of this class, in the form * "segmentName/partName". */ + @Override public String toString() { return segmentName + "/" + partName; } diff --git a/src/java/org/apache/nutch/segment/SegmentReader.java b/src/java/org/apache/nutch/segment/SegmentReader.java index 877aeb6f8..14546af54 100644 --- a/src/java/org/apache/nutch/segment/SegmentReader.java +++ b/src/java/org/apache/nutch/segment/SegmentReader.java @@ -307,6 +307,7 @@ public class SegmentReader extends Configured implements Tool { ArrayList<Thread> threads = new ArrayList<>(); if (co) threads.add(new Thread() { + @Override public void run() { try { List<Writable> res = getMapRecords(new Path(segment, @@ -319,6 +320,7 @@ public class SegmentReader extends Configured implements Tool { }); if (fe) threads.add(new Thread() { + @Override public void run() { try { List<Writable> res = getMapRecords(new Path(segment, @@ -331,6 +333,7 @@ public class SegmentReader extends Configured implements Tool { }); if (ge) threads.add(new Thread() { + @Override public void run() { try { List<Writable> res = getSeqRecords(new Path(segment, @@ -343,6 +346,7 @@ public class SegmentReader extends Configured implements Tool { }); if (pa) threads.add(new Thread() { + @Override public void run() { try { List<Writable> res = getSeqRecords(new Path(segment, @@ -355,6 +359,7 @@ public class SegmentReader extends Configured implements Tool { }); if (pd) threads.add(new Thread() { + @Override public void run() { try { List<Writable> res = getMapRecords(new Path(segment, @@ -367,6 +372,7 @@ public class SegmentReader extends Configured implements Tool { }); if (pt) threads.add(new Thread() { + @Override public void run() { try { List<Writable> res = getMapRecords(new Path(segment, diff --git a/src/java/org/apache/nutch/service/impl/ConfManagerImpl.java b/src/java/org/apache/nutch/service/impl/ConfManagerImpl.java index 784d098c5..7afe030f8 100644 --- a/src/java/org/apache/nutch/service/impl/ConfManagerImpl.java +++ b/src/java/org/apache/nutch/service/impl/ConfManagerImpl.java @@ -47,6 +47,7 @@ public class ConfManagerImpl implements ConfManager { /** * Returns the configuration associatedConfManagerImpl with the given confId */ + @Override public Configuration get(String confId) { if (confId == null) { return configurations.get(ConfigResource.DEFAULT); @@ -54,6 +55,7 @@ public class ConfManagerImpl implements ConfManager { return configurations.get(confId); } + @Override public Map<String, String> getAsMap(String confId) { Configuration configuration = configurations.get(confId); if (configuration == null) { @@ -72,6 +74,7 @@ public class ConfManagerImpl implements ConfManager { /** * Sets the given property in the configuration associated with the confId */ + @Override public void setProperty(String confId, String propName, String propValue) { if (!configurations.containsKey(confId)) { throw new IllegalArgumentException("Unknown configId '" + confId + "'"); @@ -80,6 +83,7 @@ public class ConfManagerImpl implements ConfManager { conf.set(propName, propValue); } + @Override public Set<String> list() { return configurations.keySet(); } @@ -89,6 +93,7 @@ public class ConfManagerImpl implements ConfManager { * @param nutchConfig crawler configuration * @return String - confId */ + @Override public String create(NutchConfig nutchConfig) { if (StringUtils.isBlank(nutchConfig.getConfigId())) { nutchConfig.setConfigId(String.valueOf(newConfigId.incrementAndGet())); @@ -103,6 +108,7 @@ public class ConfManagerImpl implements ConfManager { } + @Override public void delete(String confId) { configurations.remove(confId); } diff --git a/src/java/org/apache/nutch/service/impl/SeedManagerImpl.java b/src/java/org/apache/nutch/service/impl/SeedManagerImpl.java index 46d1bba1b..a28de943c 100644 --- a/src/java/org/apache/nutch/service/impl/SeedManagerImpl.java +++ b/src/java/org/apache/nutch/service/impl/SeedManagerImpl.java @@ -30,6 +30,7 @@ public class SeedManagerImpl implements SeedManager { seeds = new HashMap<>(); } + @Override public SeedList getSeedList(String seedName) { if(seeds.containsKey(seedName)) { return seeds.get(seedName); @@ -38,14 +39,17 @@ public class SeedManagerImpl implements SeedManager { return null; } + @Override public void setSeedList(String seedName, SeedList seedList) { seeds.put(seedName, seedList); } + @Override public Map<String, SeedList> getSeeds(){ return seeds; } + @Override public boolean deleteSeedList(String seedName) { if(seeds.containsKey(seedName)) { seeds.remove(seedName); diff --git a/src/java/org/apache/nutch/service/resources/AdminResource.java b/src/java/org/apache/nutch/service/resources/AdminResource.java index cc03d3356..03832628a 100644 --- a/src/java/org/apache/nutch/service/resources/AdminResource.java +++ b/src/java/org/apache/nutch/service/resources/AdminResource.java @@ -68,6 +68,7 @@ public class AdminResource extends AbstractResource{ private void scheduleServerStop() { LOG.info("Shutting down server in {} sec", DELAY_SEC); Thread thread = new Thread() { + @Override public void run() { try { Thread.sleep(DELAY_SEC*1000); diff --git a/src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java b/src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java index 1dde47808..7ba945863 100644 --- a/src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java +++ b/src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java @@ -77,6 +77,7 @@ public abstract class AbstractCommonCrawlFormat implements CommonCrawlFormat { this.reverseKeyValue = config.getReverseKeyValue(); } + @Override public String getJsonData(String url, Content content, Metadata metadata) throws IOException { this.url = url; @@ -86,6 +87,7 @@ public abstract class AbstractCommonCrawlFormat implements CommonCrawlFormat { return this.getJsonData(); } + @Override public String getJsonData(String url, Content content, Metadata metadata, ParseData parseData) throws IOException { @@ -298,10 +300,12 @@ public abstract class AbstractCommonCrawlFormat implements CommonCrawlFormat { return ifNullString(metadata.get("Content-Type")); } + @Override public List<String> getInLinks() { return inLinks; } + @Override public void setInLinks(List<String> inLinks) { this.inLinks = inLinks; } diff --git a/src/java/org/apache/nutch/tools/CommonCrawlFormat.java b/src/java/org/apache/nutch/tools/CommonCrawlFormat.java index 652196649..f73a84dc2 100644 --- a/src/java/org/apache/nutch/tools/CommonCrawlFormat.java +++ b/src/java/org/apache/nutch/tools/CommonCrawlFormat.java @@ -86,5 +86,6 @@ public interface CommonCrawlFormat extends Closeable { * Optional method that could be implemented if the actual format needs some * close procedure. */ + @Override public abstract void close(); } diff --git a/src/java/org/apache/nutch/tools/CommonCrawlFormatSimple.java b/src/java/org/apache/nutch/tools/CommonCrawlFormatSimple.java index 431074996..c0cf64b6a 100644 --- a/src/java/org/apache/nutch/tools/CommonCrawlFormatSimple.java +++ b/src/java/org/apache/nutch/tools/CommonCrawlFormatSimple.java @@ -80,7 +80,8 @@ public class CommonCrawlFormatSimple extends AbstractCommonCrawlFormat { sb.append("\"" + value + "\","); } - protected void startObject(String key) throws IOException { + @Override + protected void startObject(String key) throws IOException { String name = ""; if (key != null) { name = "\"" + key + "\": "; @@ -89,7 +90,8 @@ public class CommonCrawlFormatSimple extends AbstractCommonCrawlFormat { this.tabCount++; } - protected void closeObject(String key) throws IOException { + @Override + protected void closeObject(String key) throws IOException { if (sb.charAt(sb.length()-2) == ',') { sb.deleteCharAt(sb.length()-2); // delete comma } @@ -97,7 +99,8 @@ public class CommonCrawlFormatSimple extends AbstractCommonCrawlFormat { sb.append(printTabs() + "},\n"); } - protected String generateJson() throws IOException { + @Override + protected String generateJson() throws IOException { sb.deleteCharAt(sb.length()-1); // delete new line sb.deleteCharAt(sb.length()-1); // delete comma return sb.toString(); diff --git a/src/java/org/apache/nutch/tools/CommonCrawlFormatWARC.java b/src/java/org/apache/nutch/tools/CommonCrawlFormatWARC.java index f4010415c..9dcb72976 100644 --- a/src/java/org/apache/nutch/tools/CommonCrawlFormatWARC.java +++ b/src/java/org/apache/nutch/tools/CommonCrawlFormatWARC.java @@ -125,6 +125,7 @@ public class CommonCrawlFormatWARC extends AbstractCommonCrawlFormat { writer = new WARCWriter(SERIALNO, settings); } + @Override public String getJsonData(String url, Content content, Metadata metadata, ParseData parseData) throws IOException { this.url = url; diff --git a/src/java/org/apache/nutch/tools/DmozParser.java b/src/java/org/apache/nutch/tools/DmozParser.java index 8db481778..ac548471a 100644 --- a/src/java/org/apache/nutch/tools/DmozParser.java +++ b/src/java/org/apache/nutch/tools/DmozParser.java @@ -73,6 +73,7 @@ public class DmozParser { super(reader); } + @Override public int read() throws IOException { int c = in.read(); int value = c; @@ -89,6 +90,7 @@ public class DmozParser { return value; } + @Override public int read(char[] cbuf, int off, int len) throws IOException { int n = in.read(cbuf, off, len); if (n != -1) { diff --git a/src/java/org/apache/nutch/tools/ResolveUrls.java b/src/java/org/apache/nutch/tools/ResolveUrls.java index 92077a21b..5d84394da 100644 --- a/src/java/org/apache/nutch/tools/ResolveUrls.java +++ b/src/java/org/apache/nutch/tools/ResolveUrls.java @@ -69,6 +69,7 @@ public class ResolveUrls { this.url = url; } + @Override public void run() { numTotal.incrementAndGet(); diff --git a/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java b/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java index 7a3ba836e..abe985a85 100644 --- a/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java +++ b/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java @@ -35,6 +35,7 @@ import org.apache.hadoop.mapreduce.Mapper.Context; */ public class ArcInputFormat extends FileInputFormat<Text, BytesWritable> { + @Override public RecordReader<Text, BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext context){ return new SequenceFileRecordReader<Text, BytesWritable>(); diff --git a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java index b514a63fc..cba4694ba 100644 --- a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java +++ b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java @@ -122,6 +122,7 @@ public class ArcRecordReader extends RecordReader<Text, BytesWritable> { /** * Closes the record reader resources. */ + @Override public void close() throws IOException { this.in.close(); } @@ -159,6 +160,7 @@ public class ArcRecordReader extends RecordReader<Text, BytesWritable> { * * @return The percentage of progress as a float from 0 to 1. */ + @Override public float getProgress() throws IOException { // if we haven't even started @@ -170,18 +172,22 @@ public class ArcRecordReader extends RecordReader<Text, BytesWritable> { } } + @Override public BytesWritable getCurrentValue(){ return new BytesWritable(); } + @Override public Text getCurrentKey(){ return new Text(); } + @Override public boolean nextKeyValue(){ return false; } + @Override public void initialize(InputSplit split, TaskAttemptContext context){ } diff --git a/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java b/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java index d95ba6150..4e916dbd5 100644 --- a/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java +++ b/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java @@ -265,6 +265,7 @@ public class ArcSegmentCreator extends Configured implements Tool { * @param context * The task context. */ + @Override public void setup(Mapper<Text, BytesWritable, Text, NutchWritable>.Context context) { // set the url filters, scoring filters the parse util and the url // normalizers @@ -286,6 +287,7 @@ public class ArcSegmentCreator extends Configured implements Tool { * @param context * The context of the mapreduce job. */ + @Override public void map(Text key, BytesWritable bytes, Context context) throws IOException, InterruptedException { @@ -412,6 +414,7 @@ public class ArcSegmentCreator extends Configured implements Tool { System.exit(res); } + @Override public int run(String[] args) throws Exception { String usage = "Usage: ArcSegmentCreator <arcFiles> <segmentsOutDir>"; diff --git a/src/java/org/apache/nutch/util/AbstractChecker.java b/src/java/org/apache/nutch/util/AbstractChecker.java index 616e3dd81..3116ede14 100644 --- a/src/java/org/apache/nutch/util/AbstractChecker.java +++ b/src/java/org/apache/nutch/util/AbstractChecker.java @@ -137,6 +137,7 @@ public abstract class AbstractChecker extends Configured implements Tool { LOG.info(client.toString()); } + @Override public void run() { // Setup streams BufferedReader in = null; diff --git a/src/java/org/apache/nutch/util/CommandRunner.java b/src/java/org/apache/nutch/util/CommandRunner.java index f9dcb6166..2515bf1c2 100644 --- a/src/java/org/apache/nutch/util/CommandRunner.java +++ b/src/java/org/apache/nutch/util/CommandRunner.java @@ -175,6 +175,7 @@ public class CommandRunner { _closeInput = closeInput; } + @Override public void run() { try { byte[] buf = new byte[BUF]; diff --git a/src/java/org/apache/nutch/util/EncodingDetector.java b/src/java/org/apache/nutch/util/EncodingDetector.java index 4e921f449..cffebc4e7 100644 --- a/src/java/org/apache/nutch/util/EncodingDetector.java +++ b/src/java/org/apache/nutch/util/EncodingDetector.java @@ -89,6 +89,7 @@ public class EncodingDetector { return value; } + @Override public String toString() { return value + " (" + source + ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")"; diff --git a/src/java/org/apache/nutch/util/GenericWritableConfigurable.java b/src/java/org/apache/nutch/util/GenericWritableConfigurable.java index 086ca9bc0..db6b8cdb4 100644 --- a/src/java/org/apache/nutch/util/GenericWritableConfigurable.java +++ b/src/java/org/apache/nutch/util/GenericWritableConfigurable.java @@ -33,10 +33,12 @@ public abstract class GenericWritableConfigurable extends GenericWritable private Configuration conf; + @Override public Configuration getConf() { return conf; } + @Override public void setConf(Configuration conf) { this.conf = conf; } diff --git a/src/java/org/apache/nutch/util/PrefixStringMatcher.java b/src/java/org/apache/nutch/util/PrefixStringMatcher.java index 3f4863f35..fd88689b3 100644 --- a/src/java/org/apache/nutch/util/PrefixStringMatcher.java +++ b/src/java/org/apache/nutch/util/PrefixStringMatcher.java @@ -60,6 +60,7 @@ public class PrefixStringMatcher extends TrieStringMatcher { * Returns true if the given <code>String</code> is matched by a prefix in the * trie */ + @Override public boolean matches(String input) { TrieNode node = root; for (int i = 0; i < input.length(); i++) { @@ -76,6 +77,7 @@ public class PrefixStringMatcher extends TrieStringMatcher { * Returns the shortest prefix of <code>input</code> that is matched, * or <code>null</code> if no match exists. */ + @Override public String shortestMatch(String input) { TrieNode node = root; for (int i = 0; i < input.length(); i++) { @@ -92,6 +94,7 @@ public class PrefixStringMatcher extends TrieStringMatcher { * Returns the longest prefix of <code>input</code> that is matched, * or <code>null</code> if no match exists. */ + @Override public String longestMatch(String input) { TrieNode node = root; String result = null; diff --git a/src/java/org/apache/nutch/util/SuffixStringMatcher.java b/src/java/org/apache/nutch/util/SuffixStringMatcher.java index 1bf0774ba..42026201e 100644 --- a/src/java/org/apache/nutch/util/SuffixStringMatcher.java +++ b/src/java/org/apache/nutch/util/SuffixStringMatcher.java @@ -53,6 +53,7 @@ public class SuffixStringMatcher extends TrieStringMatcher { * Returns true if the given <code>String</code> is matched by a suffix in the * trie */ + @Override public boolean matches(String input) { TrieNode node = root; for (int i = input.length() - 1; i >= 0; i--) { @@ -69,6 +70,7 @@ public class SuffixStringMatcher extends TrieStringMatcher { * Returns the shortest suffix of <code>input</code> that is matched, * or <code>null</code> if no match exists. */ + @Override public String shortestMatch(String input) { TrieNode node = root; for (int i = input.length() - 1; i >= 0; i--) { @@ -85,6 +87,7 @@ public class SuffixStringMatcher extends TrieStringMatcher { * Returns the longest suffix of <code>input</code> that is matched, * or <code>null</code> if no match exists. */ + @Override public String longestMatch(String input) { TrieNode node = root; String result = null; diff --git a/src/java/org/apache/nutch/util/TrieStringMatcher.java b/src/java/org/apache/nutch/util/TrieStringMatcher.java index 20cd8487e..0c8602575 100644 --- a/src/java/org/apache/nutch/util/TrieStringMatcher.java +++ b/src/java/org/apache/nutch/util/TrieStringMatcher.java @@ -129,6 +129,7 @@ public abstract class TrieStringMatcher { return null; } + @Override public int compareTo(TrieNode other) { if (this.nodeChar < other.nodeChar) return -1; diff --git a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java index 1c025e083..389157265 100644 --- a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java +++ b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java @@ -335,6 +335,7 @@ public class CloudSearchIndexWriter implements IndexWriter { } } + @Override public Configuration getConf() { return this.conf; } diff --git a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java index 58e8993da..51cd41d7a 100644 --- a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java +++ b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java @@ -90,6 +90,7 @@ public class CSVIndexWriter implements IndexWriter { bytes = sepStr.getBytes(encoding); } + @Override public String toString() { StringBuilder sb = new StringBuilder(); for (char c : chars) { diff --git a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java index ad2521b4b..db09a0c88 100644 --- a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java +++ b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpRobotRulesParser.java @@ -52,6 +52,7 @@ public class HttpRobotRulesParser extends RobotRulesParser { setConf(conf); } + @Override public void setConf(Configuration conf) { super.setConf(conf); allowForbidden = conf.getBoolean("http.robots.403.allow", true); diff --git a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java index 76685675b..727698639 100644 --- a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java +++ b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java @@ -63,6 +63,7 @@ public class DOMContentUtils { this.childLen = childLen; } + @Override public String toString() { return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]"; } diff --git a/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java b/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java index ebe1919fa..81ef6c523 100644 --- a/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java +++ b/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java @@ -64,6 +64,7 @@ public class DOMContentUtils { this.childLen = childLen; } + @Override public String toString() { return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]"; } diff --git a/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestEmbeddedDocuments.java b/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestEmbeddedDocuments.java index 79ed28609..8d5b29030 100644 --- a/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestEmbeddedDocuments.java +++ b/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestEmbeddedDocuments.java @@ -33,6 +33,7 @@ public class TestEmbeddedDocuments extends TikaParserTest { private String expectedText = "When in the Course of human events"; + @Override @Before public void setUp() { super.setUp(); diff --git a/src/plugin/protocol-foo/src/java/org/apache/nutch/protocol/foo/Handler.java b/src/plugin/protocol-foo/src/java/org/apache/nutch/protocol/foo/Handler.java index 27f18377b..9664118d5 100644 --- a/src/plugin/protocol-foo/src/java/org/apache/nutch/protocol/foo/Handler.java +++ b/src/plugin/protocol-foo/src/java/org/apache/nutch/protocol/foo/Handler.java @@ -22,6 +22,7 @@ import java.net.URLStreamHandler; public class Handler extends URLStreamHandler { + @Override protected URLConnection openConnection(URL u) { throw new UnsupportedOperationException("not yet implemented"); } diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java index e23ad993f..168dae526 100644 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java +++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java @@ -228,6 +228,7 @@ public class Client extends FTP { * @exception IOException * If an error occurs while disconnecting. ***/ + @Override public void disconnect() throws IOException { __initDefaults(); super.disconnect(); diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java index d41c35a6f..f27536983 100644 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java +++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java @@ -37,6 +37,7 @@ public class PrintCommandListener implements ProtocolCommandListener { __logger = logger; } + @Override public void protocolCommandSent(ProtocolCommandEvent event) { try { __logIt(event); @@ -47,6 +48,7 @@ public class PrintCommandListener implements ProtocolCommandListener { } } + @Override public void protocolReplyReceived(ProtocolCommandEvent event) { try { __logIt(event); diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/DummyX509TrustManager.java b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/DummyX509TrustManager.java index e3521eaf0..fbd670a9a 100644 --- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/DummyX509TrustManager.java +++ b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/DummyX509TrustManager.java @@ -78,16 +78,19 @@ public class DummyX509TrustManager implements X509TrustManager { * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers() * @return a {@link java.security.cert.X509Certificate} array */ + @Override public X509Certificate[] getAcceptedIssuers() { return this.standardTrustManager.getAcceptedIssuers(); } + @Override public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing } + @Override public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing diff --git a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/DummyX509TrustManager.java b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/DummyX509TrustManager.java index 589600704..28a64a8ab 100644 --- a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/DummyX509TrustManager.java +++ b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/DummyX509TrustManager.java @@ -85,16 +85,19 @@ public class DummyX509TrustManager implements X509TrustManager { * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers() * @return a {@link java.security.cert.X509Certificate} array */ + @Override public X509Certificate[] getAcceptedIssuers() { return this.standardTrustManager.getAcceptedIssuers(); } + @Override public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing } + @Override public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing diff --git a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java index 157c9eebd..260a7c19c 100644 --- a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java +++ b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java @@ -343,22 +343,27 @@ public class HttpResponse implements Response { * ------------------------- */ + @Override public URL getUrl() { return url; } + @Override public int getCode() { return code; } + @Override public String getHeader(String name) { return headers.get(name); } + @Override public Metadata getHeaders() { return headers; } + @Override public byte[] getContent() { return content; } diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java index f867a566c..b6e3fa9c3 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java @@ -81,6 +81,7 @@ public class DummySSLProtocolSocketFactory implements /** * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int,InetAddress,int) */ + @Override public Socket createSocket(String host, int port, InetAddress clientHost, int clientPort) throws IOException, UnknownHostException { @@ -117,6 +118,7 @@ public class DummySSLProtocolSocketFactory implements * @throws UnknownHostException * if the IP address of the host cannot be determined */ + @Override public Socket createSocket(final String host, final int port, final InetAddress localAddress, final int localPort, final HttpConnectionParams params) throws IOException, @@ -137,6 +139,7 @@ public class DummySSLProtocolSocketFactory implements /** * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int) */ + @Override public Socket createSocket(String host, int port) throws IOException, UnknownHostException { return getSSLContext().getSocketFactory().createSocket(host, port); @@ -145,17 +148,20 @@ public class DummySSLProtocolSocketFactory implements /** * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(Socket,String,int,boolean) */ + @Override public Socket createSocket(Socket socket, String host, int port, boolean autoClose) throws IOException, UnknownHostException { return getSSLContext().getSocketFactory().createSocket(socket, host, port, autoClose); } + @Override public boolean equals(Object obj) { return ((obj != null) && obj.getClass().equals( DummySSLProtocolSocketFactory.class)); } + @Override public int hashCode() { return DummySSLProtocolSocketFactory.class.hashCode(); } diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java index 1bb7cf130..859dfff52 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java @@ -78,16 +78,19 @@ public class DummyX509TrustManager implements X509TrustManager { * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers() * @return a {@link java.security.cert.X509Certificate} array */ + @Override public X509Certificate[] getAcceptedIssuers() { return this.standardTrustManager.getAcceptedIssuers(); } + @Override public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing } + @Override public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java index f33be6ddd..6113ef732 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java @@ -56,10 +56,12 @@ public class HttpAuthenticationFactory implements Configurable { setConf(conf); } + @Override public void setConf(Configuration conf) { this.conf = conf; } + @Override public Configuration getConf() { return conf; } diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java index 0603e3da5..90c03e89b 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java @@ -110,6 +110,7 @@ public class HttpBasicAuthentication implements HttpAuthentication, * ---------------------------------- */ + @Override public void setConf(Configuration conf) { this.conf = conf; // if (conf.getBoolean("http.auth.verbose", false)) { @@ -119,6 +120,7 @@ public class HttpBasicAuthentication implements HttpAuthentication, // } } + @Override public Configuration getConf() { return this.conf; } @@ -135,6 +137,7 @@ public class HttpBasicAuthentication implements HttpAuthentication, * <code>Authorization: Basic <Base64 encoded userid:password></code> * */ + @Override public List<String> getCredentials() { return credentials; } @@ -146,6 +149,7 @@ public class HttpBasicAuthentication implements HttpAuthentication, * * @return The realm */ + @Override public String getRealm() { return realm; } diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java index 010f5ca99..87ee0bb8a 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java @@ -209,22 +209,27 @@ public class HttpResponse implements Response { * ------------------------- */ + @Override public URL getUrl() { return url; } + @Override public int getCode() { return code; } + @Override public String getHeader(String name) { return headers.get(name); } + @Override public Metadata getHeaders() { return headers; } + @Override public byte[] getContent() { return content; } diff --git a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/DummyX509TrustManager.java b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/DummyX509TrustManager.java index 9e3afdc05..8f9d69be9 100644 --- a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/DummyX509TrustManager.java +++ b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/DummyX509TrustManager.java @@ -78,16 +78,19 @@ public class DummyX509TrustManager implements X509TrustManager { * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers() * @return a {@link java.security.cert.X509Certificate} array */ + @Override public X509Certificate[] getAcceptedIssuers() { return this.standardTrustManager.getAcceptedIssuers(); } + @Override public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing } + @Override public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing diff --git a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java index a5793c6e3..16fe29180 100644 --- a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java +++ b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java @@ -333,22 +333,27 @@ public class HttpResponse implements Response { * ------------------------- */ + @Override public URL getUrl() { return url; } + @Override public int getCode() { return code; } + @Override public String getHeader(String name) { return headers.get(name); } + @Override public Metadata getHeaders() { return headers; } + @Override public byte[] getContent() { return content; } diff --git a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefalultMultiInteractionHandler.java b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefalultMultiInteractionHandler.java index 126d9e9ec..831bab63e 100644 --- a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefalultMultiInteractionHandler.java +++ b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefalultMultiInteractionHandler.java @@ -34,6 +34,7 @@ public class DefalultMultiInteractionHandler implements private static final Logger LOG = LoggerFactory .getLogger(MethodHandles.lookup().lookupClass()); + @Override public String processDriver(WebDriver driver) { // loop and get multiple pages in this string String accumulatedData = ""; @@ -49,6 +50,7 @@ public class DefalultMultiInteractionHandler implements return accumulatedData; } + @Override public boolean shouldProcessURL(String URL) { return true; } diff --git a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefaultClickAllAjaxLinksHandler.java b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefaultClickAllAjaxLinksHandler.java index 4f05acd3d..178c804b1 100644 --- a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefaultClickAllAjaxLinksHandler.java +++ b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefaultClickAllAjaxLinksHandler.java @@ -40,6 +40,7 @@ public class DefaultClickAllAjaxLinksHandler implements InteractiveSeleniumHandl private static final Logger LOG = LoggerFactory .getLogger(MethodHandles.lookup().lookupClass()); + @Override public String processDriver(WebDriver driver) { String accumulatedData = ""; @@ -84,6 +85,7 @@ public class DefaultClickAllAjaxLinksHandler implements InteractiveSeleniumHandl return accumulatedData; } + @Override public boolean shouldProcessURL(String URL) { return true; } diff --git a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttpResponse.java b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttpResponse.java index 5ec6a9b8a..67bc45b03 100644 --- a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttpResponse.java +++ b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttpResponse.java @@ -221,22 +221,27 @@ public class OkHttpResponse implements Response { return arr; } + @Override public URL getUrl() { return this.url; } + @Override public int getCode() { return this.code; } + @Override public String getHeader(String name) { return this.headers.get(name); } + @Override public Metadata getHeaders() { return this.headers; } + @Override public byte[] getContent() { return this.content; } diff --git a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/DummyX509TrustManager.java b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/DummyX509TrustManager.java index 03ac20472..48bfe22fc 100644 --- a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/DummyX509TrustManager.java +++ b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/DummyX509TrustManager.java @@ -78,16 +78,19 @@ public class DummyX509TrustManager implements X509TrustManager { * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers() * @return a {@link java.security.cert.X509Certificate} array */ + @Override public X509Certificate[] getAcceptedIssuers() { return this.standardTrustManager.getAcceptedIssuers(); } + @Override public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing } + @Override public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { // do nothing diff --git a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java index b394d02df..bb3bf6357 100644 --- a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java +++ b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java @@ -331,22 +331,27 @@ public class HttpResponse implements Response { * ------------------------- */ + @Override public URL getUrl() { return url; } + @Override public int getCode() { return code; } + @Override public String getHeader(String name) { return headers.get(name); } + @Override public Metadata getHeaders() { return headers; } + @Override public byte[] getContent() { return content; } diff --git a/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java b/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java index b33a660b9..89c1e2dfe 100644 --- a/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java +++ b/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java @@ -63,6 +63,7 @@ public class AutomatonURLFilter extends RegexURLFilterBase { * Rules specified as a config property will override rules specified as a * config file. */ + @Override protected Reader getRulesReader(Configuration conf) throws IOException { String stringRules = conf.get(URLFILTER_AUTOMATON_RULES); if (stringRules != null) { @@ -73,10 +74,12 @@ public class AutomatonURLFilter extends RegexURLFilterBase { } // Inherited Javadoc + @Override protected RegexRule createRule(boolean sign, String regex) { return new Rule(sign, regex); } + @Override protected RegexRule createRule(boolean sign, String regex, String hostOrDomain) { return new Rule(sign, regex, hostOrDomain); } @@ -104,6 +107,7 @@ public class AutomatonURLFilter extends RegexURLFilterBase { automaton = new RunAutomaton(new RegExp(regex, RegExp.ALL).toAutomaton()); } + @Override protected boolean match(String url) { return automaton.run(url); } diff --git a/src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java b/src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java index 22ffb09cc..bf2d6b236 100644 --- a/src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java +++ b/src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java @@ -33,6 +33,7 @@ import org.junit.Test; */ public class TestAutomatonURLFilter extends RegexURLFilterBaseTest { + @Override protected URLFilter getURLFilter(Reader rules) { try { return new AutomatonURLFilter(rules); diff --git a/src/plugin/urlfilter-fast/src/java/org/apache/nutch/urlfilter/fast/FastURLFilter.java b/src/plugin/urlfilter-fast/src/java/org/apache/nutch/urlfilter/fast/FastURLFilter.java index d53a2fd86..ffcd0138a 100644 --- a/src/plugin/urlfilter-fast/src/java/org/apache/nutch/urlfilter/fast/FastURLFilter.java +++ b/src/plugin/urlfilter-fast/src/java/org/apache/nutch/urlfilter/fast/FastURLFilter.java @@ -268,6 +268,7 @@ public class FastURLFilter implements URLFilter { return pattern.matcher(url.toString()).find(); } + @Override public String toString() { return pattern.toString(); } @@ -278,6 +279,7 @@ public class FastURLFilter implements URLFilter { super(regex); } + @Override public boolean match(URL url) { String haystack = url.getPath(); return pattern.matcher(haystack).find(); @@ -297,6 +299,7 @@ public class FastURLFilter implements URLFilter { return instance; } + @Override public boolean match(URL url) { return true; } @@ -307,6 +310,7 @@ public class FastURLFilter implements URLFilter { super(regex); } + @Override public boolean match(URL url) { String haystack = url.getFile(); return pattern.matcher(haystack).find(); diff --git a/src/plugin/urlfilter-fast/src/test/org/apache/nutch/urlfilter/fast/TestFastURLFilter.java b/src/plugin/urlfilter-fast/src/test/org/apache/nutch/urlfilter/fast/TestFastURLFilter.java index 96092288e..8e01d8d3c 100644 --- a/src/plugin/urlfilter-fast/src/test/org/apache/nutch/urlfilter/fast/TestFastURLFilter.java +++ b/src/plugin/urlfilter-fast/src/test/org/apache/nutch/urlfilter/fast/TestFastURLFilter.java @@ -28,6 +28,7 @@ import org.junit.Test; public class TestFastURLFilter extends RegexURLFilterBaseTest { + @Override protected URLFilter getURLFilter(Reader rules) { try { return new FastURLFilter(rules); diff --git a/src/plugin/urlfilter-ignoreexempt/src/java/org/apache/nutch/urlfilter/ignoreexempt/ExemptionUrlFilter.java b/src/plugin/urlfilter-ignoreexempt/src/java/org/apache/nutch/urlfilter/ignoreexempt/ExemptionUrlFilter.java index 675d85756..96ca9b4ac 100644 --- a/src/plugin/urlfilter-ignoreexempt/src/java/org/apache/nutch/urlfilter/ignoreexempt/ExemptionUrlFilter.java +++ b/src/plugin/urlfilter-ignoreexempt/src/java/org/apache/nutch/urlfilter/ignoreexempt/ExemptionUrlFilter.java @@ -69,6 +69,7 @@ public class ExemptionUrlFilter extends RegexURLFilter /** * Gets reader for regex rules */ + @Override protected Reader getRulesReader(Configuration conf) throws IOException { String fileRules = conf.get(DB_IGNORE_EXTERNAL_EXEMPTIONS_FILE); diff --git a/src/plugin/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java b/src/plugin/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java index b7a7ce439..0079fa7fd 100644 --- a/src/plugin/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java +++ b/src/plugin/urlfilter-prefix/src/test/org/apache/nutch/urlfilter/prefix/TestPrefixURLFilter.java @@ -67,6 +67,7 @@ public class TestPrefixURLFilter extends TestCase { TestRunner.run(suite()); } + @Override public void setUp() throws IOException { filter = new PrefixURLFilter(prefixes); } diff --git a/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java b/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java index 9245a808a..19d14c33c 100644 --- a/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java +++ b/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java @@ -58,6 +58,7 @@ public class RegexURLFilter extends RegexURLFilterBase { * Rules specified as a config property will override rules specified as a * config file. */ + @Override protected Reader getRulesReader(Configuration conf) throws IOException { String stringRules = conf.get(URLFILTER_REGEX_RULES); if (stringRules != null) { @@ -68,10 +69,12 @@ public class RegexURLFilter extends RegexURLFilterBase { } // Inherited Javadoc + @Override protected RegexRule createRule(boolean sign, String regex) { return new Rule(sign, regex); } + @Override protected RegexRule createRule(boolean sign, String regex, String hostOrDomain) { return new Rule(sign, regex, hostOrDomain); } @@ -102,6 +105,7 @@ public class RegexURLFilter extends RegexURLFilterBase { pattern = Pattern.compile(regex); } + @Override protected boolean match(String url) { return pattern.matcher(url).find(); } diff --git a/src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java b/src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java index d86c71268..bd0503eb4 100644 --- a/src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java +++ b/src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java @@ -33,6 +33,7 @@ import org.junit.Test; */ public class TestRegexURLFilter extends RegexURLFilterBaseTest { + @Override protected URLFilter getURLFilter(Reader rules) { try { return new RegexURLFilter(rules); diff --git a/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java b/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java index c86d55ac2..b1d087dd7 100644 --- a/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java +++ b/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java @@ -80,6 +80,7 @@ public class RegexURLNormalizer extends Configured implements URLNormalizer { } private ThreadLocal<HashMap<String, List<Rule>>> scopedRulesThreadLocal = new ThreadLocal<HashMap<String, List<Rule>>>() { + @Override protected java.util.HashMap<String, java.util.List<Rule>> initialValue() { return new HashMap<String, List<Rule>>(); }; diff --git a/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java b/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java index 9c1bf99f8..1eee7183b 100644 --- a/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java +++ b/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java @@ -54,6 +54,7 @@ public class TestRegexURLNormalizer { conf = NutchConfiguration.create(); normalizer.setConf(conf); File[] configs = new File(sampleDir).listFiles(new FileFilter() { + @Override public boolean accept(File f) { if (f.getName().endsWith(".xml") && f.getName().startsWith("regex-normalize-")) diff --git a/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java b/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java index 87da8faf2..b125aeb3f 100644 --- a/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java +++ b/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java @@ -105,6 +105,7 @@ public class CrawlDBTestUtil { } /** collected values as List */ + @Override public List<CrawlDatum> getValues() { return values; } @@ -123,17 +124,21 @@ public class CrawlDBTestUtil { private Counters dummyCounters = new Counters(); + @Override public void progress() { } + @Override public Counter getCounter(Enum<?> arg0) { return dummyCounters.getGroup("dummy").getCounterForName("dummy"); } + @Override public Counter getCounter(String arg0, String arg1) { return dummyCounters.getGroup("dummy").getCounterForName("dummy"); } + @Override public void setStatus(String arg0) throws UnsupportedOperationException { throw new UnsupportedOperationException("Dummy context with no status"); } @@ -143,14 +148,17 @@ public class CrawlDBTestUtil { throw new UnsupportedOperationException("Dummy context with no status"); } + @Override public float getProgress() { return 1f; } + @Override public OutputCommitter getOutputCommitter() { throw new UnsupportedOperationException("Dummy context without committer"); } + @Override public boolean nextKey(){ return false; } diff --git a/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java b/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java index 1417adf37..0fd094ece 100644 --- a/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java +++ b/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java @@ -80,6 +80,7 @@ public class CrawlDbUpdateUtil <T extends Reducer<Text, CrawlDatum, Text, CrawlD } /** collected values as List */ + @Override public List<CrawlDatum> getValues() { return values; } @@ -98,17 +99,21 @@ public class CrawlDbUpdateUtil <T extends Reducer<Text, CrawlDatum, Text, CrawlD private Counters dummyCounters = new Counters(); + @Override public void progress() { } + @Override public Counter getCounter(Enum<?> arg0) { return dummyCounters.getGroup("dummy").getCounterForName("dummy"); } + @Override public Counter getCounter(String arg0, String arg1) { return dummyCounters.getGroup("dummy").getCounterForName("dummy"); } + @Override public void setStatus(String arg0) throws UnsupportedOperationException { throw new UnsupportedOperationException("Dummy context with no status"); } @@ -118,14 +123,17 @@ public class CrawlDbUpdateUtil <T extends Reducer<Text, CrawlDatum, Text, CrawlD throw new UnsupportedOperationException("Dummy context with no status"); } + @Override public float getProgress() { return 1f; } + @Override public OutputCommitter getOutputCommitter() { throw new UnsupportedOperationException("Dummy context without committer"); } + @Override public boolean nextKey(){ return false; } diff --git a/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java b/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java index 3fa798d68..f83c8d9fb 100644 --- a/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java +++ b/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java @@ -36,6 +36,7 @@ public class TestAdaptiveFetchSchedule extends TestCase { private long curTime, lastModified; private int changed, interval, calculateInterval; + @Override @Before public void setUp() throws Exception { super.setUp(); diff --git a/src/test/org/apache/nutch/crawl/TestGenerator.java b/src/test/org/apache/nutch/crawl/TestGenerator.java index ad05f2136..56b24c1a2 100644 --- a/src/test/org/apache/nutch/crawl/TestGenerator.java +++ b/src/test/org/apache/nutch/crawl/TestGenerator.java @@ -119,6 +119,7 @@ public class TestGenerator { */ public class ScoreComparator implements Comparator<URLCrawlDatum> { + @Override public int compare(URLCrawlDatum tuple1, URLCrawlDatum tuple2) { if (tuple2.datum.getScore() - tuple1.datum.getScore() < 0) { return -1; diff --git a/src/test/org/apache/nutch/plugin/HelloWorldExtension.java b/src/test/org/apache/nutch/plugin/HelloWorldExtension.java index 451815d9e..5524ec343 100644 --- a/src/test/org/apache/nutch/plugin/HelloWorldExtension.java +++ b/src/test/org/apache/nutch/plugin/HelloWorldExtension.java @@ -29,6 +29,7 @@ public class HelloWorldExtension implements ITestExtension { * @see * org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String) */ + @Override public String testGetExtension(String hello) { return hello + " World"; } diff --git a/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java b/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java index 5eb3450c5..52d08cdaa 100644 --- a/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java +++ b/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java @@ -37,6 +37,7 @@ public class SimpleTestPlugin extends Plugin { /* * @see org.apache.nutch.plugin.Plugin#startUp() */ + @Override public void startUp() throws PluginRuntimeException { System.err.println("start up Plugin: " + getDescriptor().getPluginId()); @@ -47,6 +48,7 @@ public class SimpleTestPlugin extends Plugin { * * @see org.apache.nutch.plugin.Plugin#shutDown() */ + @Override public void shutDown() throws PluginRuntimeException { System.err.println("shutdown Plugin: " + getDescriptor().getPluginId());