This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit a606dad64f3a3f2a4e288fcdd7521790b88827c5 Merge: bc3c1ad 55c7f75 Author: Lewis John McGibbney <[email protected]> AuthorDate: Sun Dec 3 00:47:29 2017 -0800 Merge branch 'master' of https://github.com/apache/nutch build.xml | 9 +- conf/nutch-default.xml | 35 ++++++ default.properties | 1 + src/java/org/apache/nutch/crawl/CrawlDatum.java | 4 + src/java/org/apache/nutch/crawl/CrawlDb.java | 1 + src/java/org/apache/nutch/crawl/CrawlDbFilter.java | 18 ++- src/java/org/apache/nutch/crawl/CrawlDbReader.java | 26 ++++- .../org/apache/nutch/crawl/CrawlDbReducer.java | 13 ++- src/java/org/apache/nutch/crawl/Generator.java | 16 ++- src/java/org/apache/nutch/crawl/Injector.java | 52 +++++++-- .../org/apache/nutch/crawl/URLPartitioner.java | 14 ++- .../org/apache/nutch/fetcher/FetchItemQueue.java | 6 + .../org/apache/nutch/fetcher/FetcherThread.java | 41 ++++--- src/java/org/apache/nutch/hostdb/ReadHostDb.java | 23 +++- .../org/apache/nutch/net/URLFilterChecker.java | 3 +- .../org/apache/nutch/parse/ParseOutputFormat.java | 36 ++++-- .../org/apache/nutch/scoring/ScoringFilter.java | 16 ++- .../org/apache/nutch/scoring/ScoringFilters.java | 8 ++ .../apache/nutch/tools/CommonCrawlDataDumper.java | 4 +- .../apache/nutch/util/CrawlCompletionStats.java | 13 ++- .../nutch/util/ProtocolStatusStatistics.java | 13 ++- .../org/apache/nutch/util/SitemapProcessor.java | 27 ++++- .../apache/nutch/util/domain/DomainStatistics.java | 13 ++- src/plugin/build.xml | 3 + src/plugin/headings/ivy.xml | 1 + .../nutch/parse/headings/HeadingsParseFilter.java | 9 +- .../parse/headings/TestHeadingsParseFilter.java | 51 ++++++++ .../nutch/indexer/more/MoreIndexingFilter.java | 2 +- .../indexwriter/elastic/ElasticIndexWriter.java | 2 +- .../apache/nutch/protocol/http/api/HttpBase.java | 4 +- .../apache/nutch/parse/html/DOMContentUtils.java | 12 +- .../nutch/parse/html/TestDOMContentUtils.java | 16 ++- .../apache/nutch/parse/tika/DOMContentUtils.java | 38 ++++-- .../org/apache/nutch/parse/tika/TikaParser.java | 2 +- .../org/apache/nutch/tika/TestDOMContentUtils.java | 15 ++- .../org/apache/nutch/protocol/ftp/FtpResponse.java | 2 + .../protocol/interactiveselenium/HttpResponse.java | 2 +- .../handlers/DefalultMultiInteractionHandler.java | 2 +- .../handlers/DefaultClickAllAjaxLinksHandler.java | 2 +- .../handlers/DefaultHandler.java | 2 +- .../handlers/InteractiveSeleniumHandler.java | 2 +- src/plugin/scoring-orphan/build.xml | 27 +++++ src/plugin/{headings => scoring-orphan}/ivy.xml | 2 +- src/plugin/scoring-orphan/plugin.xml | 38 ++++++ .../nutch/scoring/orphan/OrphanScoringFilter.java | 107 +++++++++++++++++ .../apache/nutch/scoring/orphan/package-info.java} | 14 +-- .../scoring/orphan/TestOrphanScoringFilter.java | 128 +++++++++++++++++++++ .../urlnormalizer/basic/BasicURLNormalizer.java | 2 +- .../basic/TestBasicURLNormalizer.java | 8 +- .../net/urlnormalizer/host/HostURLNormalizer.java | 2 +- .../protocol/ProtocolURLNormalizer.java | 2 +- .../urlnormalizer/slash/SlashURLNormalizer.java | 2 +- .../org/apache/nutch/crawl/TestCrawlDbStates.java | 10 +- 53 files changed, 767 insertions(+), 134 deletions(-) -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
