This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 5f6c383cc91f59ad28d9110efa4ad0109eda9117 Merge: 365077c b2130b4 Author: Lewis John McGibbney <[email protected]> AuthorDate: Sun Jul 30 09:36:27 2017 -0700 Merge pull request #192 from kaidul/NUTCH-2389 NUTCH-2389 Precise data extractor implemented for 2.x build.xml | 5 + conf/jsoup-extractor-example.xml | 88 ++++++++++ conf/jsoup-extractor.xml | 53 ++++++ conf/nutch-default.xml | 9 ++ src/plugin/build.xml | 3 + src/plugin/jsoup-extractor/build.xml | 28 ++++ src/plugin/jsoup-extractor/ivy.xml | 40 +++++ src/plugin/jsoup-extractor/plugin.xml | 56 +++++++ .../nutch/core/jsoup/extractor/JsoupDocument.java | 127 +++++++++++++++ .../core/jsoup/extractor/JsoupDocumentReader.java | 179 +++++++++++++++++++++ .../jsoup/extractor/JsoupExtractorConstants.java | 36 +++++ .../jsoup/extractor/normalizer/Normalizable.java | 22 +++ .../normalizer/SimpleStringNormalizer.java | 31 ++++ .../jsoup/extractor/normalizer/package-info.java | 22 +++ .../nutch/core/jsoup/extractor/package-info.java | 22 +++ .../jsoup/extractor/JsoupIndexingFilter.java | 85 ++++++++++ .../indexer/jsoup/extractor/package-info.java | 22 +++ .../parse/jsoup/extractor/JsoupHtmlParser.java | 118 ++++++++++++++ .../nutch/parse/jsoup/extractor/package-info.java | 22 +++ .../parse/jsoup/extractor/TestJsoupHtmlParser.java | 102 ++++++++++++ .../parse/jsoup/extractor/ViewCountNormalizer.java | 30 ++++ 21 files changed, 1100 insertions(+) -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
