This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit cce1cf853c36ebd5cdf635fa80f4138778a7c943
Merge: 1f2c2ff 9e0c316
Author: Lewis John McGibbney <[email protected]>
AuthorDate: Wed Dec 20 19:41:39 2017 +0000

    Merge branch 'master' of https://github.com/apache/nutch

 build.xml                                          |   4 +
 conf/nutch-default.xml                             |  76 +++++-
 conf/regex-urlfilter.txt.template                  |   2 +-
 conf/tika-config.xml.template                      |  20 ++
 default.properties                                 |   1 +
 docker/Dockerfile                                  |  19 +-
 ivy/ivy.xml                                        |  29 ++-
 src/java/org/apache/nutch/crawl/CrawlDatum.java    |  18 +-
 src/java/org/apache/nutch/crawl/CrawlDb.java       |  12 +-
 src/java/org/apache/nutch/crawl/CrawlDbFilter.java |   5 +-
 src/java/org/apache/nutch/crawl/CrawlDbReader.java | 286 +++++++++------------
 src/java/org/apache/nutch/crawl/Generator.java     |   2 +-
 .../org/apache/nutch/fetcher/FetcherThread.java    | 115 +++++----
 .../nutch/indexer/IndexingFiltersChecker.java      | 125 ++-------
 .../org/apache/nutch/net/URLFilterChecker.java     | 129 ++++------
 src/java/org/apache/nutch/net/URLFilters.java      |   4 +
 .../org/apache/nutch/net/URLNormalizerChecker.java | 112 ++++----
 .../nutch/service/model/request/SeedUrl.java       |   8 -
 .../nutch/service/resources/SeedResource.java      |  11 -
 src/java/org/apache/nutch/tools/FileDumper.java    |   8 +-
 .../org/apache/nutch/util/AbstractChecker.java     | 171 ++++++++++++
 src/java/org/apache/nutch/util/DomUtil.java        |   9 +
 src/java/org/apache/nutch/webui/model/SeedUrl.java |   9 -
 src/plugin/build.xml                               |   2 +
 src/plugin/index-geoip/ivy.xml                     |   4 +-
 src/plugin/index-geoip/plugin.xml                  |  14 +-
 src/plugin/index-jexl-filter/build.xml             |  22 ++
 .../{indexer-elastic => index-jexl-filter}/ivy.xml |   4 +-
 .../{index-geoip => index-jexl-filter}/plugin.xml  |  34 +--
 .../nutch/indexer/jexl/JexlIndexingFilter.java     | 131 ++++++++++
 .../apache/nutch/indexer/jexl/package-info.java    |  30 +++
 .../nutch/indexer/jexl/TestJexlIndexingFilter.java | 124 +++++++++
 src/plugin/indexer-elastic-rest/plugin.xml         |   5 +-
 .../elasticrest/ElasticRestConstants.java          |   6 +-
 .../elasticrest/ElasticRestIndexWriter.java        |  61 ++++-
 src/plugin/indexer-elastic/ivy.xml                 |   6 +-
 src/plugin/indexer-elastic/plugin.xml              |  82 ++++--
 .../indexwriter/elastic/ElasticIndexWriter.java    |  11 +-
 .../elastic/TestElasticIndexWriter.java            |  13 +-
 .../analysis/lang/LanguageIndexingFilter.java      |   9 +
 .../indexer/filter/MimeTypeIndexingFilter.java     |   2 +-
 .../apache/nutch/parse/html/DOMContentUtils.java   |   7 +-
 .../org/apache/nutch/parse/html/HtmlParser.java    |  12 +-
 .../apache/nutch/parse/html/TestHtmlParser.java    |  26 +-
 src/plugin/parse-tika/ivy.xml                      |   5 +-
 src/plugin/parse-tika/plugin.xml                   |  98 ++++---
 .../apache/nutch/parse/tika/DOMContentUtils.java   |   7 +-
 .../org/apache/nutch/parse/tika/TikaParser.java    |  15 +-
 .../org/apache/nutch/tika}/TestHtmlParser.java     |  32 ++-
 49 files changed, 1227 insertions(+), 710 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
"[email protected]" <[email protected]>.

Reply via email to