Merge branch 'kamaci/NUTCH-2351' into 2.x, closes #171
Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/54300a89 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/54300a89 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/54300a89 Branch: refs/heads/2.x Commit: 54300a89e7a329effa11a8e3218ccff984ed8669 Parents: 7d3e45f 1a84334 Author: Sebastian Nagel <[email protected]> Authored: Thu Jan 19 23:01:11 2017 +0100 Committer: Sebastian Nagel <[email protected]> Committed: Thu Jan 19 23:01:11 2017 +0100 ---------------------------------------------------------------------- src/java/org/apache/nutch/api/NutchServer.java | 4 ++- .../org/apache/nutch/api/impl/JobWorker.java | 4 ++- .../apache/nutch/api/impl/db/DbIterator.java | 4 ++- .../nutch/api/resources/AdminResource.java | 3 ++- .../nutch/api/resources/SeedResource.java | 6 +++-- .../nutch/api/security/SecurityUtils.java | 4 ++- .../nutch/crawl/AbstractFetchSchedule.java | 3 ++- .../org/apache/nutch/crawl/DbUpdateMapper.java | 3 ++- .../org/apache/nutch/crawl/DbUpdateReducer.java | 2 +- .../org/apache/nutch/crawl/DbUpdaterJob.java | 4 ++- .../nutch/crawl/FetchScheduleFactory.java | 6 +++-- .../org/apache/nutch/crawl/GeneratorJob.java | 4 ++- .../org/apache/nutch/crawl/InjectorJob.java | 6 +++-- .../apache/nutch/crawl/SignatureFactory.java | 3 ++- .../org/apache/nutch/crawl/URLPartitioner.java | 3 ++- .../org/apache/nutch/crawl/WebTableReader.java | 5 ++-- .../org/apache/nutch/fetcher/FetcherJob.java | 4 ++- .../apache/nutch/fetcher/FetcherReducer.java | 2 +- src/java/org/apache/nutch/host/HostDb.java | 8 +++--- .../org/apache/nutch/host/HostDbReader.java | 10 +++++--- .../org/apache/nutch/host/HostDbUpdateJob.java | 6 +++-- .../org/apache/nutch/host/HostInjectorJob.java | 5 ++-- .../org/apache/nutch/indexer/CleaningJob.java | 4 ++- .../nutch/indexer/IndexCleaningFilters.java | 5 ++-- .../org/apache/nutch/indexer/IndexUtil.java | 10 +++++--- .../org/apache/nutch/indexer/IndexWriters.java | 4 ++- .../apache/nutch/indexer/IndexingFilters.java | 5 ++-- .../nutch/indexer/IndexingFiltersChecker.java | 5 ++-- .../org/apache/nutch/indexer/IndexingJob.java | 4 ++- .../indexer/solr/SolrDeleteDuplicates.java | 5 ++-- .../apache/nutch/indexer/solr/SolrUtils.java | 4 ++- .../org/apache/nutch/net/URLNormalizers.java | 5 ++-- .../apache/nutch/parse/OutlinkExtractor.java | 3 ++- .../apache/nutch/parse/ParsePluginsReader.java | 5 ++-- src/java/org/apache/nutch/parse/ParseUtil.java | 4 ++- .../org/apache/nutch/parse/ParserChecker.java | 4 ++- .../org/apache/nutch/parse/ParserFactory.java | 4 ++- src/java/org/apache/nutch/parse/ParserJob.java | 4 ++- .../apache/nutch/plugin/PluginDescriptor.java | 5 ++-- .../nutch/plugin/PluginManifestParser.java | 2 +- .../apache/nutch/plugin/PluginRepository.java | 5 ++-- .../apache/nutch/protocol/ProtocolFactory.java | 5 ++-- .../apache/nutch/protocol/RobotRulesParser.java | 5 ++-- src/java/org/apache/nutch/tools/Benchmark.java | 4 ++- src/java/org/apache/nutch/tools/DmozParser.java | 4 ++- .../org/apache/nutch/tools/ResolveUrls.java | 4 ++- .../apache/nutch/tools/arc/ArcRecordReader.java | 5 ++-- .../nutch/tools/proxy/LogDebugHandler.java | 3 ++- .../apache/nutch/tools/proxy/TestbedProxy.java | 5 +++- src/java/org/apache/nutch/util/Bytes.java | 4 ++- .../org/apache/nutch/util/DeflateUtils.java | 4 ++- src/java/org/apache/nutch/util/DomUtil.java | 4 ++- .../org/apache/nutch/util/EncodingDetector.java | 5 ++-- src/java/org/apache/nutch/util/GZIPUtils.java | 4 ++- src/java/org/apache/nutch/util/MimeUtil.java | 5 ++-- src/java/org/apache/nutch/util/ObjectCache.java | 4 ++- .../nutch/util/domain/DomainStatistics.java | 3 ++- .../nutch/util/domain/DomainSuffixes.java | 3 ++- .../nutch/util/domain/DomainSuffixesReader.java | 3 ++- .../nutch/webui/client/impl/CrawlingCycle.java | 6 +++-- .../client/impl/RemoteCommandExecutor.java | 6 +++-- .../webui/service/impl/CrawlServiceImpl.java | 6 +++-- .../webui/service/impl/NutchServiceImpl.java | 7 +++--- .../creativecommons/nutch/CCIndexingFilter.java | 5 ++-- .../creativecommons/nutch/CCParseFilter.java | 4 ++- .../org/apache/nutch/parse/feed/FeedParser.java | 5 ++-- .../apache/nutch/parse/feed/TestFeedParser.java | 5 ++-- .../indexer/anchor/AnchorIndexingFilter.java | 5 ++-- .../indexer/basic/BasicIndexingFilter.java | 5 ++-- .../nutch/indexer/html/HtmlIndexingFilter.java | 5 ++-- .../nutch/indexer/more/MoreIndexingFilter.java | 5 ++-- .../indexwriter/elastic/ElasticIndexWriter.java | 4 ++- .../elastic2/ElasticIndexWriter.java | 4 ++- .../nutch/indexwriter/solr/SolrIndexWriter.java | 5 ++-- .../indexwriter/solr/SolrMappingReader.java | 4 ++- .../nutch/indexwriter/solr/SolrUtils.java | 4 ++- .../nutch/analysis/lang/HTMLLanguageParser.java | 5 ++-- .../nutch/protocol/http/api/HttpBase.java | 26 +++++++++++--------- .../protocol/http/api/HttpRobotRulesParser.java | 6 +++-- .../nutch/urlfilter/api/RegexURLFilterBase.java | 5 ++-- .../urlfilter/api/RegexURLFilterBaseTest.java | 5 ++-- .../nutch/microformats/reltag/RelTagParser.java | 4 ++- .../org/apache/nutch/parse/ext/ExtParser.java | 4 ++- .../org/apache/nutch/parse/html/HtmlParser.java | 5 ++-- .../apache/nutch/parse/html/TestHtmlParser.java | 5 ++-- .../apache/nutch/parse/js/JSParseFilter.java | 4 ++- .../nutch/parse/metatags/MetaTagsParser.java | 9 ++++--- .../org/apache/nutch/parse/swf/SWFParser.java | 3 ++- .../tika/BoilerpipeExtractorRepository.java | 6 +++-- .../org/apache/nutch/parse/tika/TikaParser.java | 4 ++- .../nutch/parse/tika/DOMContentUtilsTest.java | 9 ++++--- .../org/apache/nutch/parse/zip/ZipParser.java | 4 ++- .../nutch/parse/zip/ZipTextExtractor.java | 8 +++--- .../org/apache/nutch/protocol/file/File.java | 4 ++- .../java/org/apache/nutch/protocol/ftp/Ftp.java | 4 ++- .../nutch/protocol/ftp/FtpRobotRulesParser.java | 5 ++-- .../org/apache/nutch/protocol/http/Http.java | 4 ++- .../DummySSLProtocolSocketFactory.java | 3 ++- .../apache/nutch/protocol/httpclient/Http.java | 4 ++- .../httpclient/HttpAuthenticationFactory.java | 5 ++-- .../httpclient/HttpBasicAuthentication.java | 5 ++-- .../org/apache/nutch/protocol/sftp/Sftp.java | 26 +++++++++++--------- .../nutch/scoring/opic/OPICScoringFilter.java | 5 ++-- .../nutch/collection/CollectionManager.java | 4 ++- .../SubcollectionIndexingFilter.java | 5 ++-- .../nutch/indexer/tld/TLDIndexingFilter.java | 5 ++-- .../nutch/urlfilter/domain/DomainURLFilter.java | 3 ++- .../urlfilter/domain/TestDomainURLFilter.java | 5 ++-- .../nutch/urlfilter/prefix/PrefixURLFilter.java | 3 ++- .../nutch/urlfilter/suffix/SuffixURLFilter.java | 3 ++- .../urlnormalizer/basic/BasicURLNormalizer.java | 5 ++-- .../urlnormalizer/regex/RegexURLNormalizer.java | 3 ++- .../regex/TestRegexURLNormalizer.java | 3 ++- .../org/apache/nutch/fetcher/TestFetcher.java | 4 ++- 114 files changed, 365 insertions(+), 200 deletions(-) ----------------------------------------------------------------------
