This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch NUTCH-2292 in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 9a0ce9e3e8a7d1fc6093b31e40ff26e503dc7beb Merge: ecc60d7 62491d5 Author: Lewis John McGibbney <[email protected]> AuthorDate: Tue Mar 7 14:20:23 2017 -0800 Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292 .gitignore | 6 +++- conf/nutch-default.xml | 8 +++++ default.properties | 4 +-- ivy/mvn.template | 4 +-- .../java/org/apache/nutch/crawl/CrawlDatum.java | 8 ++--- .../main/java/org/apache/nutch/crawl/CrawlDb.java | 21 +++++------ .../java/org/apache/nutch/crawl/CrawlDbMerger.java | 2 +- .../java/org/apache/nutch/crawl/CrawlDbReader.java | 14 ++++---- .../org/apache/nutch/crawl/CrawlDbReducer.java | 2 +- .../org/apache/nutch/crawl/DeduplicationJob.java | 2 +- .../java/org/apache/nutch/crawl/Generator.java | 6 ++-- .../main/java/org/apache/nutch/crawl/Injector.java | 2 +- .../main/java/org/apache/nutch/crawl/Inlinks.java | 8 ++--- .../main/java/org/apache/nutch/crawl/LinkDb.java | 21 +++++------ .../java/org/apache/nutch/crawl/LinkDbMerger.java | 2 +- .../java/org/apache/nutch/crawl/LinkDbReader.java | 2 +- .../nutch/crawl/MimeAdaptiveFetchSchedule.java | 2 +- .../apache/nutch/crawl/TextProfileSignature.java | 6 ++-- .../org/apache/nutch/fetcher/FetchItemQueues.java | 2 +- .../java/org/apache/nutch/fetcher/FetchNodeDb.java | 2 +- .../java/org/apache/nutch/fetcher/Fetcher.java | 17 ++++----- .../org/apache/nutch/fetcher/FetcherThread.java | 6 ++-- .../apache/nutch/fetcher/FetcherThreadEvent.java | 2 +- .../nutch/fetcher/FetcherThreadPublisher.java | 0 .../apache/nutch/hostdb/UpdateHostDbReducer.java | 14 ++++---- .../org/apache/nutch/indexer/IndexWriters.java | 2 +- .../nutch/indexer/IndexingFiltersChecker.java | 2 +- .../java/org/apache/nutch/indexer/IndexingJob.java | 21 +++++------ .../org/apache/nutch/indexer/NutchDocument.java | 2 +- .../java/org/apache/nutch/indexer/NutchField.java | 4 +-- .../java/org/apache/nutch/metadata/Metadata.java | 2 +- .../nutch/metadata/SpellCheckedMetadata.java | 2 +- .../java/org/apache/nutch/net/URLNormalizers.java | 8 ++--- .../org/apache/nutch/parse/OutlinkExtractor.java | 2 +- .../java/org/apache/nutch/parse/ParseData.java | 5 +-- .../org/apache/nutch/parse/ParseOutputFormat.java | 4 +-- .../org/apache/nutch/parse/ParsePluginList.java | 4 +-- .../org/apache/nutch/parse/ParsePluginsReader.java | 4 +-- .../java/org/apache/nutch/parse/ParseResult.java | 2 +- .../java/org/apache/nutch/parse/ParseSegment.java | 15 ++++---- .../java/org/apache/nutch/parse/ParseText.java | 5 +-- .../java/org/apache/nutch/parse/ParserChecker.java | 2 +- .../java/org/apache/nutch/parse/ParserFactory.java | 4 +-- .../java/org/apache/nutch/plugin/Extension.java | 2 +- .../org/apache/nutch/plugin/ExtensionPoint.java | 2 +- .../org/apache/nutch/plugin/PluginDescriptor.java | 16 ++++----- .../apache/nutch/plugin/PluginManifestParser.java | 2 +- .../org/apache/nutch/plugin/PluginRepository.java | 30 ++++++++-------- .../java/org/apache/nutch/protocol/Content.java | 5 +-- .../org/apache/nutch/protocol/ProtocolStatus.java | 2 +- .../apache/nutch/protocol/RobotRulesParser.java | 6 ++-- .../org/apache/nutch/publisher/NutchPublisher.java | 0 .../apache/nutch/publisher/NutchPublishers.java | 0 .../apache/nutch/scoring/webgraph/LinkDumper.java | 6 ++-- .../apache/nutch/scoring/webgraph/LinkRank.java | 6 ++-- .../apache/nutch/scoring/webgraph/NodeReader.java | 2 +- .../apache/nutch/scoring/webgraph/WebGraph.java | 8 ++--- .../nutch/segment/ContentAsTextInputFormat.java | 2 +- .../org/apache/nutch/segment/SegmentMerger.java | 10 +++--- .../org/apache/nutch/segment/SegmentReader.java | 17 ++++----- .../java/org/apache/nutch/service/NutchServer.java | 4 +-- .../java/org/apache/nutch/service/SeedManager.java | 0 .../org/apache/nutch/service/impl/LinkReader.java | 8 ++--- .../org/apache/nutch/service/impl/NodeReader.java | 8 ++--- .../apache/nutch/service/impl/SeedManagerImpl.java | 0 .../apache/nutch/service/impl/SequenceReader.java | 12 +++---- .../nutch/service/model/request/DbQuery.java | 2 +- .../service/model/response/FetchNodeDbInfo.java | 2 +- .../apache/nutch/service/resources/DbResource.java | 2 +- .../java/org/apache/nutch/tools/Benchmark.java | 8 ++--- .../apache/nutch/tools/CommonCrawlDataDumper.java | 4 +-- .../nutch/tools/CommonCrawlFormatJettinson.java | 4 +-- .../java/org/apache/nutch/tools/DmozParser.java | 22 ++++-------- .../java/org/apache/nutch/tools/FileDumper.java | 35 ++++++------------ .../java/org/apache/nutch/tools/FreeGenerator.java | 2 +- .../org/apache/nutch/tools/warc/WARCExporter.java | 2 +- .../org/apache/nutch/util/EncodingDetector.java | 10 +++--- .../java/org/apache/nutch/util/HadoopFSUtil.java | 19 ++++------ .../main/java/org/apache/nutch/util/MimeUtil.java | 5 +-- .../java/org/apache/nutch/util/NodeWalker.java | 2 +- .../main/java/org/apache/nutch/util/NutchTool.java | 2 +- .../java/org/apache/nutch/util/ObjectCache.java | 4 +-- .../org/apache/nutch/util/TrieStringMatcher.java | 4 +-- .../apache/nutch/util/domain/DomainSuffixes.java | 2 +- .../webui/pages/components/ColorEnumLabel.java | 2 +- .../pages/components/ColorEnumLabelBuilder.java | 2 +- .../webui/pages/components/CpmIteratorAdapter.java | 2 +- .../nutch/webui/pages/crawls/CrawlPanel.java | 8 ++--- .../nutch/webui/pages/crawls/CrawlsPage.java | 4 +-- .../nutch/webui/pages/instances/InstancePanel.java | 2 +- .../nutch/webui/pages/instances/InstancesPage.java | 4 +-- .../nutch/webui/pages/seed/SeedListsPage.java | 4 +-- .../apache/nutch/webui/pages/seed/SeedPage.java | 6 ++-- .../nutch/webui/pages/settings/SettingsPage.java | 4 +-- .../elastic/TestElasticIndexWriter.java | 0 .../src/test/resources}/nutch-site-test.xml | 0 .../apache/nutch/protocol/http/api/HttpBase.java | 15 ++++++-- nutch-plugins/parsefilter-regex/README.txt | 41 ++++++++++++++++++++++ .../nutch/parsefilter/regex/RegexParseFilter.java | 18 ++++++---- nutch-plugins/pom.xml | 1 + .../apache/nutch/protocol/http/HttpResponse.java | 11 ++++-- nutch-plugins/protocol-httpclient/pom.xml | 20 +++++++++-- .../nutch/protocol/httpclient/HttpResponse.java | 7 ++++ .../publish-rabbitmq/build-ivy.xml | 0 .../publish-rabbitmq/build.xml | 0 .../publish-rabbitmq/ivy.xml | 0 .../publish-rabbitmq/plugin.xml | 0 .../pom.xml | 32 ++++++----------- .../publisher/rabbitmq/RabbitMQPublisherImpl.java | 0 .../nutch/publisher/rabbitmq/package-info.java | 0 .../urlnormalizer/basic/BasicURLNormalizer.java | 5 ++- .../basic/TestBasicURLNormalizer.java | 6 ++++ pom.xml | 20 +++++++++-- 113 files changed, 398 insertions(+), 348 deletions(-) -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
