This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 5f1330a03 NUTCH-3043 Generator: count URLs rejected by URL filters 
(#814)
5f1330a03 is described below

commit 5f1330a03d136440a167a85da6cfe8ac4b3f61b9
Author: Sebastian Nagel <[email protected]>
AuthorDate: Tue May 14 17:38:25 2024 +0200

    NUTCH-3043 Generator: count URLs rejected by URL filters (#814)
    
    - add counters URL_FILTERS_REJECTED and URL_FILTER_EXCEPTION
    - simplify logging statement
    - remove unnecessary cast
    - use parameterized logging
---
 src/java/org/apache/nutch/crawl/Generator.java | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/java/org/apache/nutch/crawl/Generator.java 
b/src/java/org/apache/nutch/crawl/Generator.java
index 33f743a37..f57642a65 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -224,9 +224,12 @@ public class Generator extends NutchTool implements Tool {
         // If filtering is on don't generate URLs that don't pass
         // URLFilters
         try {
-          if (filters.filter(url.toString()) == null)
+          if (filters.filter(url.toString()) == null) {
+            context.getCounter("Generator", 
"URL_FILTERS_REJECTED").increment(1);
             return;
+          }
         } catch (URLFilterException e) {
+          context.getCounter("Generator", "URL_FILTER_EXCEPTION").increment(1);
           LOG.warn("Couldn't filter url: {} ({})", url, e.getMessage());
         }
       }
@@ -253,10 +256,7 @@ public class Generator extends NutchTool implements Tool {
       try {
         sort = scfilters.generatorSortValue(key, crawlDatum, sort);
       } catch (ScoringFilterException sfe) {
-        if (LOG.isWarnEnabled()) {
-          LOG.warn(
-              "Couldn't filter generatorSortValue for " + key + ": " + sfe);
-        }
+        LOG.warn("Couldn't filter generatorSortValue for {}: {}", key, sfe);
       }
 
       // check expr
@@ -625,7 +625,7 @@ public class Generator extends NutchTool implements Tool {
       // make later bytes more significant in hash code, so that sorting
       // by hashcode correlates less with by-host ordering.
       for (int i = length - 1; i >= 0; i--)
-        hash = (31 * hash) + (int) bytes[start + i];
+        hash = (31 * hash) + bytes[start + i];
       return hash;
     }
   }

Reply via email to