This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit cce1cf853c36ebd5cdf635fa80f4138778a7c943 Merge: 1f2c2ff 9e0c316 Author: Lewis John McGibbney <[email protected]> AuthorDate: Wed Dec 20 19:41:39 2017 +0000 Merge branch 'master' of https://github.com/apache/nutch build.xml | 4 + conf/nutch-default.xml | 76 +++++- conf/regex-urlfilter.txt.template | 2 +- conf/tika-config.xml.template | 20 ++ default.properties | 1 + docker/Dockerfile | 19 +- ivy/ivy.xml | 29 ++- src/java/org/apache/nutch/crawl/CrawlDatum.java | 18 +- src/java/org/apache/nutch/crawl/CrawlDb.java | 12 +- src/java/org/apache/nutch/crawl/CrawlDbFilter.java | 5 +- src/java/org/apache/nutch/crawl/CrawlDbReader.java | 286 +++++++++------------ src/java/org/apache/nutch/crawl/Generator.java | 2 +- .../org/apache/nutch/fetcher/FetcherThread.java | 115 +++++---- .../nutch/indexer/IndexingFiltersChecker.java | 125 ++------- .../org/apache/nutch/net/URLFilterChecker.java | 129 ++++------ src/java/org/apache/nutch/net/URLFilters.java | 4 + .../org/apache/nutch/net/URLNormalizerChecker.java | 112 ++++---- .../nutch/service/model/request/SeedUrl.java | 8 - .../nutch/service/resources/SeedResource.java | 11 - src/java/org/apache/nutch/tools/FileDumper.java | 8 +- .../org/apache/nutch/util/AbstractChecker.java | 171 ++++++++++++ src/java/org/apache/nutch/util/DomUtil.java | 9 + src/java/org/apache/nutch/webui/model/SeedUrl.java | 9 - src/plugin/build.xml | 2 + src/plugin/index-geoip/ivy.xml | 4 +- src/plugin/index-geoip/plugin.xml | 14 +- src/plugin/index-jexl-filter/build.xml | 22 ++ .../{indexer-elastic => index-jexl-filter}/ivy.xml | 4 +- .../{index-geoip => index-jexl-filter}/plugin.xml | 34 +-- .../nutch/indexer/jexl/JexlIndexingFilter.java | 131 ++++++++++ .../apache/nutch/indexer/jexl/package-info.java | 30 +++ .../nutch/indexer/jexl/TestJexlIndexingFilter.java | 124 +++++++++ src/plugin/indexer-elastic-rest/plugin.xml | 5 +- .../elasticrest/ElasticRestConstants.java | 6 +- .../elasticrest/ElasticRestIndexWriter.java | 61 ++++- src/plugin/indexer-elastic/ivy.xml | 6 +- src/plugin/indexer-elastic/plugin.xml | 82 ++++-- .../indexwriter/elastic/ElasticIndexWriter.java | 11 +- .../elastic/TestElasticIndexWriter.java | 13 +- .../analysis/lang/LanguageIndexingFilter.java | 9 + .../indexer/filter/MimeTypeIndexingFilter.java | 2 +- .../apache/nutch/parse/html/DOMContentUtils.java | 7 +- .../org/apache/nutch/parse/html/HtmlParser.java | 12 +- .../apache/nutch/parse/html/TestHtmlParser.java | 26 +- src/plugin/parse-tika/ivy.xml | 5 +- src/plugin/parse-tika/plugin.xml | 98 ++++--- .../apache/nutch/parse/tika/DOMContentUtils.java | 7 +- .../org/apache/nutch/parse/tika/TikaParser.java | 15 +- .../org/apache/nutch/tika}/TestHtmlParser.java | 32 ++- 49 files changed, 1227 insertions(+), 710 deletions(-) -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
