This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 98d02e70f NUTCH-2992 Fetcher: always block fetch queues when 
exceptions threshold is reached - if QueueFeeder is still alive, also block 
queues which are empty right now
98d02e70f is described below

commit 98d02e70f6d83f4fb99abf89a990a3e13a933076
Author: Sebastian Nagel <sna...@apache.org>
AuthorDate: Tue May 16 17:30:49 2023 +0200

    NUTCH-2992 Fetcher: always block fetch queues when exceptions threshold is 
reached
    - if QueueFeeder is still alive, also block queues which are empty right now
---
 .../org/apache/nutch/fetcher/FetchItemQueues.java  | 25 ++++++++++++----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/java/org/apache/nutch/fetcher/FetchItemQueues.java 
b/src/java/org/apache/nutch/fetcher/FetchItemQueues.java
index 9dfbeb277..cec272b45 100644
--- a/src/java/org/apache/nutch/fetcher/FetchItemQueues.java
+++ b/src/java/org/apache/nutch/fetcher/FetchItemQueues.java
@@ -303,19 +303,22 @@ public class FetchItemQueues {
           "* queue: {} >> delayed next fetch by {} ms after {} exceptions in 
queue",
           queueid, exceptionDelay, excCount);
     }
-    if (fiq.getQueueSize() == 0) {
-      return 0;
-    }
-    if (maxExceptions!= -1 && excCount >= maxExceptions) {
+    if (maxExceptions != -1 && excCount >= maxExceptions) {
       // too many exceptions for items in this queue - purge it
       int deleted = fiq.emptyQueue();
-      LOG.info(
-          "* queue: {} >> removed {} URLs from queue because {} exceptions 
occurred",
-          queueid, deleted, excCount);
-      totalSize.getAndAdd(-deleted);
-      // keep queue IDs to ensure that these queues aren't created and filled
-      // again, see addFetchItem(FetchItem)
-      queuesMaxExceptions.add(queueid);
+      if (deleted > 0) {
+        LOG.info(
+            "* queue: {} >> removed {} URLs from queue because {} exceptions 
occurred",
+            queueid, deleted, excCount);
+        totalSize.getAndAdd(-deleted);
+      }
+      if (feederAlive) {
+        LOG.info("* queue: {} >> blocked after {} exceptions", queueid,
+            excCount);
+        // keep queue IDs to ensure that these queues aren't created and filled
+        // again, see addFetchItem(FetchItem)
+        queuesMaxExceptions.add(queueid);
+      }
       return deleted;
     }
     return 0;

Reply via email to