This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new 010c2fc NUTCH-2691: Improve logging from scoring-depth plugin new cb580f0 Merge pull request #434 from YossiTamari/patch-3 010c2fc is described below commit 010c2fc8035525545812ae8acfbeeda1a8bbb96b Author: YossiTamari <33034182+yossitam...@users.noreply.github.com> AuthorDate: Tue Jan 22 18:03:45 2019 +0200 NUTCH-2691: Improve logging from scoring-depth plugin Exit distributeScoreToOutlinks immediately if there are no outlinks. This is a very small performance improvement, but more importantly it prevents the plugin from emitting a "Missing depth, removing all outlinks from url" warn message for every page that failed parsing. --- .../src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java b/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java index 07e0e3f..c016030 100644 --- a/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java +++ b/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java @@ -73,6 +73,9 @@ public class DepthScoringFilter extends Configured implements ScoringFilter { public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust, int allCount) throws ScoringFilterException { + if (targets.isEmpty()) { + return adjust; + } String depthString = parseData.getMeta(DEPTH_KEY); if (depthString == null) { LOG.warn("Missing depth, removing all outlinks from url " + fromUrl);