This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit f356790764e3685591daad4ffc8c430d4c16ab07 Merge: bd8c847 e5a11a9 Author: Sebastian Nagel <[email protected]> AuthorDate: Wed Oct 25 16:50:11 2017 +0200 Merge pull request #211 from sebastian-nagel/NUTCH-1932 NUTCH-1932 Automatically remove orphaned pages build.xml | 4 + conf/nutch-default.xml | 28 +++++ default.properties | 1 + src/java/org/apache/nutch/crawl/CrawlDatum.java | 4 + src/java/org/apache/nutch/crawl/CrawlDb.java | 1 + src/java/org/apache/nutch/crawl/CrawlDbFilter.java | 18 ++- .../org/apache/nutch/crawl/CrawlDbReducer.java | 13 ++- .../org/apache/nutch/scoring/ScoringFilter.java | 16 ++- .../org/apache/nutch/scoring/ScoringFilters.java | 8 ++ src/plugin/build.xml | 3 + src/plugin/scoring-orphan/build.xml | 27 +++++ src/plugin/scoring-orphan/ivy.xml | 41 +++++++ src/plugin/scoring-orphan/plugin.xml | 38 ++++++ .../nutch/scoring/orphan/OrphanScoringFilter.java | 107 +++++++++++++++++ .../apache/nutch/scoring/orphan/package-info.java | 23 ++++ .../scoring/orphan/TestOrphanScoringFilter.java | 128 +++++++++++++++++++++ .../org/apache/nutch/crawl/TestCrawlDbStates.java | 10 +- 17 files changed, 457 insertions(+), 13 deletions(-) -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
