This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit bdbe7b330b5e7fd712f1b5126f69e2efebb194e8
Author: Sebastian Nagel <sebast...@commoncrawl.org>
AuthorDate: Tue May 3 17:14:03 2022 +0200

    NUTCH-2946 Fetcher: optionally slow down fetching from hosts with repeated 
exceptions
    - configure the delay in seconds as a float instead of milliseconds
    - use the value of fetcher.server.delay as default
    - double the delay with every observed exception (exponential backoff)
      but cap the growth at 2**31 to avoid overflows
---
 conf/nutch-default.xml                                | 14 +++++++++-----
 .../org/apache/nutch/fetcher/FetchItemQueues.java     | 19 +++++++++++++------
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 7775fc70d..7faa6fdcd 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -1091,11 +1091,15 @@
 
 <property>
   <name>fetcher.exceptions.per.queue.delay</name>
-  <value>-1</value>
-  <description>Additional delay in milliseconds slowing down fetches from a 
queue
-  if an exception has occurred (see also fetcher.max.exceptions.per.queue).
-  The delay grows logarithmically with the number of observed exceptions:
-     delay = fetcher.exceptions.per.queue.delay * log2(1 + 
num_exception_in_queue)
+  <value>${fetcher.server.delay}</value>
+  <description>Initial value (in seconds) of an additional dynamic
+  delay slowing down fetches from a queue after an exception has
+  occurred (see also fetcher.max.exceptions.per.queue). Starting with
+  the initial value the delay doubles with every observed exception:
+
+    delay = fetcher.exceptions.per.queue.delay * 2^num_exception_in_queue
+
+  An initial value of 0.0 disables this exponential backoff mechanism.
   </description>
 </property>
 
diff --git a/src/java/org/apache/nutch/fetcher/FetchItemQueues.java 
b/src/java/org/apache/nutch/fetcher/FetchItemQueues.java
index 0faf391ce..4914cd207 100644
--- a/src/java/org/apache/nutch/fetcher/FetchItemQueues.java
+++ b/src/java/org/apache/nutch/fetcher/FetchItemQueues.java
@@ -85,8 +85,8 @@ public class FetchItemQueues {
     this.timelimit = conf.getLong("fetcher.timelimit", -1);
     this.maxExceptionsPerQueue = conf.getInt(
         "fetcher.max.exceptions.per.queue", -1);
-    this.exceptionsPerQueueDelay = conf
-        .getLong("fetcher.exceptions.per.queue.delay", -1);
+    this.exceptionsPerQueueDelay = (long) (conf
+        .getFloat("fetcher.exceptions.per.queue.delay", .0f) * 1000);
 
     int dedupRedirMaxTime = conf.getInt("fetcher.redirect.dedupcache.seconds",
         -1);
@@ -272,10 +272,17 @@ public class FetchItemQueues {
       fiq.nextFetchTime.getAndAdd(delay);
       LOG.info("* queue: {} >> delayed next fetch by {} ms", queueid, delay);
     } else if (exceptionsPerQueueDelay > 0) {
-      // delay the next fetch by a time span growing at log scale
-      // with the number of observed exceptions
-      long exceptionDelay = (long) (exceptionsPerQueueDelay
-          * Math.log(1 + excCount) / Math.log(2));
+      /*
+       * Delay the next fetch by a time span growing exponentially with the
+       * number of observed exceptions. This dynamic delay is added to the
+       * constant delay. In order to avoid overflows, the exponential backoff 
is
+       * capped at 2**31
+       */
+      long exceptionDelay = exceptionsPerQueueDelay;
+      if (excCount > 1) {
+        // double the initial delay with every observed exception
+        exceptionDelay *= 2L << Math.min((excCount - 2), 31);
+      }
       fiq.nextFetchTime.getAndAdd(exceptionDelay);
       LOG.info(
           "* queue: {} >> delayed next fetch by {} ms after {} exceptions in 
queue",

Reply via email to