This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 5263b7cbe NUTCH-3096 HostDB ResolverThread can create too many job
counters (patch contributed by Markus Jelsma)
5263b7cbe is described below
commit 5263b7cbea0a50bf0bb3324f139f2ad3030f6875
Author: Sebastian Nagel <[email protected]>
AuthorDate: Wed Dec 4 16:11:11 2024 +0100
NUTCH-3096 HostDB ResolverThread can create too many job counters
(patch contributed by Markus Jelsma)
---
.../org/apache/nutch/hostdb/ResolverThread.java | 23 +++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/src/java/org/apache/nutch/hostdb/ResolverThread.java
b/src/java/org/apache/nutch/hostdb/ResolverThread.java
index 434e7bb31..c0a4f124b 100644
--- a/src/java/org/apache/nutch/hostdb/ResolverThread.java
+++ b/src/java/org/apache/nutch/hostdb/ResolverThread.java
@@ -114,15 +114,32 @@ public class ResolverThread implements Runnable {
}
}
- context.getCounter("UpdateHostDb",
- Long.toString(datum.numFailures()) + "_times_failed").increment(1);
+ context.getCounter("UpdateHostDb",
createFailureCounterLabel(datum)).increment(1);
} catch (Exception ioe) {
LOG.warn(StringUtils.stringifyException(ioe));
}
} catch (Exception e) {
LOG.warn(StringUtils.stringifyException(e));
}
-
+
context.getCounter("UpdateHostDb", "checked_hosts").increment(1);
}
+
+ private String createFailureCounterLabel(HostDatum datum) {
+ // Hadoop will allow no more than 120 distinct counters. If we have a large
+ // number of distinct failures, we'll exceed the limit, Hadoop will
complain,
+ // the job will fail. Let's limit the amount of possibilities by grouping
+ // the numFailures in buckets. NUTCH-3096
+ String label = null;
+ long n = datum.numFailures();
+ if (n < 4) {
+ label = Long.toString(n);
+ } else if (n > 3 && n < 11) {
+ label = "4-10";
+ } else {
+ label = ">10";
+ }
+
+ return label + "_times_failed";
+ }
}