This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 87c7a2e  Fix for NUTCH-2494 contributed by Ashraful Islam, closes #274
87c7a2e is described below

commit 87c7a2e57bdde875a0ab36f8586fb607970a4e1d
Author: Md Ashraful Islam <[email protected]>
AuthorDate: Thu Jan 11 17:16:38 2018 +0600

    Fix for NUTCH-2494 contributed by Ashraful Islam, closes #274
---
 src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java | 13 ++++++++-----
 src/java/org/apache/nutch/parse/ParseOutputFormat.java     | 12 +++++++-----
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java 
b/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
index d526a07..121a825 100644
--- a/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
+++ b/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
@@ -44,16 +44,19 @@ import org.apache.nutch.protocol.Content;
 /** Splits FetcherOutput entries into multiple map files. */
 public class FetcherOutputFormat implements OutputFormat<Text, NutchWritable> {
 
-  public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
+  public void checkOutputSpecs(FileSystem ignored, JobConf job)
+      throws IOException {
     Path out = FileOutputFormat.getOutputPath(job);
     if ((out == null) && (job.getNumReduceTasks() != 0)) {
       throw new InvalidJobConfException("Output directory not set in 
JobConf.");
     }
-    if (fs == null) {
-      fs = out.getFileSystem(job);
+
+    if (out != null) {
+      FileSystem fs = out.getFileSystem(job);
+      if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME))) {
+        throw new IOException("Segment already fetched!");
+      }
     }
-    if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME)))
-      throw new IOException("Segment already fetched!");
   }
 
   public RecordWriter<Text, NutchWritable> getRecordWriter(final FileSystem fs,
diff --git a/src/java/org/apache/nutch/parse/ParseOutputFormat.java 
b/src/java/org/apache/nutch/parse/ParseOutputFormat.java
index 2c8396a..722ad7d 100644
--- a/src/java/org/apache/nutch/parse/ParseOutputFormat.java
+++ b/src/java/org/apache/nutch/parse/ParseOutputFormat.java
@@ -79,16 +79,18 @@ public class ParseOutputFormat implements 
OutputFormat<Text, Parse> {
     }
   }
 
-  public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
+  public void checkOutputSpecs(FileSystem ignored, JobConf job)
+      throws IOException {
     Path out = FileOutputFormat.getOutputPath(job);
     if ((out == null) && (job.getNumReduceTasks() != 0)) {
       throw new InvalidJobConfException("Output directory not set in 
JobConf.");
     }
-    if (fs == null) {
-      fs = out.getFileSystem(job);
+
+    if (out != null) {
+      FileSystem fs = out.getFileSystem(job);
+      if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME)))
+        throw new IOException("Segment already parsed!");
     }
-    if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME)))
-      throw new IOException("Segment already parsed!");
   }
 
   public RecordWriter<Text, Parse> getRecordWriter(FileSystem fs, JobConf job,

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to