This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 5263b7cbe NUTCH-3096 HostDB ResolverThread can create too many job 
counters (patch contributed by Markus Jelsma)
5263b7cbe is described below

commit 5263b7cbea0a50bf0bb3324f139f2ad3030f6875
Author: Sebastian Nagel <[email protected]>
AuthorDate: Wed Dec 4 16:11:11 2024 +0100

    NUTCH-3096 HostDB ResolverThread can create too many job counters
    (patch contributed by Markus Jelsma)
---
 .../org/apache/nutch/hostdb/ResolverThread.java    | 23 +++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/java/org/apache/nutch/hostdb/ResolverThread.java 
b/src/java/org/apache/nutch/hostdb/ResolverThread.java
index 434e7bb31..c0a4f124b 100644
--- a/src/java/org/apache/nutch/hostdb/ResolverThread.java
+++ b/src/java/org/apache/nutch/hostdb/ResolverThread.java
@@ -114,15 +114,32 @@ public class ResolverThread implements Runnable {
           }
         }
 
-        context.getCounter("UpdateHostDb",
-          Long.toString(datum.numFailures()) + "_times_failed").increment(1);
+        context.getCounter("UpdateHostDb", 
createFailureCounterLabel(datum)).increment(1);
       } catch (Exception ioe) {
         LOG.warn(StringUtils.stringifyException(ioe));
       }
     } catch (Exception e) {
       LOG.warn(StringUtils.stringifyException(e));
     }
-    
+
     context.getCounter("UpdateHostDb", "checked_hosts").increment(1);
   }
+
+  private String createFailureCounterLabel(HostDatum datum) {
+    // Hadoop will allow no more than 120 distinct counters. If we have a large
+    // number of distinct failures, we'll exceed the limit, Hadoop will 
complain,
+    // the job will fail. Let's limit the amount of possibilities by grouping
+    // the numFailures in buckets. NUTCH-3096
+    String label = null;
+    long n = datum.numFailures();
+    if (n < 4) {
+      label = Long.toString(n);
+    } else if (n > 3 && n < 11) {
+      label = "4-10";
+    } else {
+      label = ">10";
+    }
+
+    return label + "_times_failed";
+  }
 }

Reply via email to