This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit 814f8b997ddbdc670d7ffba59ee33f1b3cc3ab96
Author: Sebastian Nagel <sna...@apache.org>
AuthorDate: Thu Apr 23 15:55:32 2020 +0200

    NUTCH-1194 Generator: CrawlDB lock should be released earlier
    - release CrawlDb lock after select step, in case, generated items
      are not marked in CrawlDb (generate.update.crawldb is false)
---
 src/java/org/apache/nutch/crawl/Generator.java | 18 +++++++++++++-----
 src/java/org/apache/nutch/util/NutchJob.java   | 14 ++++++++++++--
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/java/org/apache/nutch/crawl/Generator.java 
b/src/java/org/apache/nutch/crawl/Generator.java
index 5dcd2ea..04c2ae8 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -841,6 +841,14 @@ public class Generator extends NutchTool implements Tool {
           String.format(Locale.ROOT, "%6d", counter.getValue()),
           counter.getName());
     }
+    if (!getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
+      /*
+       * generated items are not marked in CrawlDb, and CrawlDb will not
+       * accessed anymore: we already can release the lock
+       */
+      LockUtil.removeLockFile(getConf(), lock);
+      lock = null;
+    }
 
     // read the subdirectories generated in the temp
     // output and turn them into segments
@@ -858,15 +866,13 @@ public class Generator extends NutchTool implements Tool {
       }
     } catch (Exception e) {
       LOG.warn("Generator: exception while partitioning segments, exiting 
...");
-      LockUtil.removeLockFile(getConf(), lock);
-      fs.delete(tempDir, true);
+      NutchJob.cleanupAfterFailure(tempDir, lock, fs);
       return null;
     }
 
     if (generatedSegments.size() == 0) {
       LOG.warn("Generator: 0 records selected for fetching, exiting ...");
-      LockUtil.removeLockFile(getConf(), lock);
-      fs.delete(tempDir, true);
+      NutchJob.cleanupAfterFailure(tempDir, lock, fs);
       return null;
     }
 
@@ -913,7 +919,9 @@ public class Generator extends NutchTool implements Tool {
       fs.delete(tempDir2, true);
     }
 
-    LockUtil.removeLockFile(getConf(), lock);
+    if (lock != null) {
+      LockUtil.removeLockFile(getConf(), lock);
+    }
     fs.delete(tempDir, true);
 
     long end = System.currentTimeMillis();
diff --git a/src/java/org/apache/nutch/util/NutchJob.java 
b/src/java/org/apache/nutch/util/NutchJob.java
index 991e506..13257d2 100644
--- a/src/java/org/apache/nutch/util/NutchJob.java
+++ b/src/java/org/apache/nutch/util/NutchJob.java
@@ -41,7 +41,15 @@ public class NutchJob extends Job {
     return Job.getInstance(conf);
   } 
 
-  /*
+  /**
+   * Clean up the file system in case of a job failure.
+   */
+  public static void cleanupAfterFailure(Path tempDir, FileSystem fs)
+      throws IOException {
+    cleanupAfterFailure(tempDir, null, fs);
+  }
+
+  /**
    * Clean up the file system in case of a job failure.
    */
   public static void cleanupAfterFailure(Path tempDir, Path lock, FileSystem 
fs)
@@ -50,7 +58,9 @@ public class NutchJob extends Job {
       if (fs.exists(tempDir)) {
         fs.delete(tempDir, true);
       }
-      LockUtil.removeLockFile(fs, lock);
+      if (lock != null) {
+        LockUtil.removeLockFile(fs, lock);
+      }
     } catch (IOException e) {
       LOG.error("NutchJob cleanup failed: {}", e.getMessage());
       throw e;

Reply via email to