This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 87c7a2e Fix for NUTCH-2494 contributed by Ashraful Islam, closes #274
87c7a2e is described below
commit 87c7a2e57bdde875a0ab36f8586fb607970a4e1d
Author: Md Ashraful Islam <[email protected]>
AuthorDate: Thu Jan 11 17:16:38 2018 +0600
Fix for NUTCH-2494 contributed by Ashraful Islam, closes #274
---
src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java | 13 ++++++++-----
src/java/org/apache/nutch/parse/ParseOutputFormat.java | 12 +++++++-----
2 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
b/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
index d526a07..121a825 100644
--- a/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
+++ b/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
@@ -44,16 +44,19 @@ import org.apache.nutch.protocol.Content;
/** Splits FetcherOutput entries into multiple map files. */
public class FetcherOutputFormat implements OutputFormat<Text, NutchWritable> {
- public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
+ public void checkOutputSpecs(FileSystem ignored, JobConf job)
+ throws IOException {
Path out = FileOutputFormat.getOutputPath(job);
if ((out == null) && (job.getNumReduceTasks() != 0)) {
throw new InvalidJobConfException("Output directory not set in
JobConf.");
}
- if (fs == null) {
- fs = out.getFileSystem(job);
+
+ if (out != null) {
+ FileSystem fs = out.getFileSystem(job);
+ if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME))) {
+ throw new IOException("Segment already fetched!");
+ }
}
- if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME)))
- throw new IOException("Segment already fetched!");
}
public RecordWriter<Text, NutchWritable> getRecordWriter(final FileSystem fs,
diff --git a/src/java/org/apache/nutch/parse/ParseOutputFormat.java
b/src/java/org/apache/nutch/parse/ParseOutputFormat.java
index 2c8396a..722ad7d 100644
--- a/src/java/org/apache/nutch/parse/ParseOutputFormat.java
+++ b/src/java/org/apache/nutch/parse/ParseOutputFormat.java
@@ -79,16 +79,18 @@ public class ParseOutputFormat implements
OutputFormat<Text, Parse> {
}
}
- public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
+ public void checkOutputSpecs(FileSystem ignored, JobConf job)
+ throws IOException {
Path out = FileOutputFormat.getOutputPath(job);
if ((out == null) && (job.getNumReduceTasks() != 0)) {
throw new InvalidJobConfException("Output directory not set in
JobConf.");
}
- if (fs == null) {
- fs = out.getFileSystem(job);
+
+ if (out != null) {
+ FileSystem fs = out.getFileSystem(job);
+ if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME)))
+ throw new IOException("Segment already parsed!");
}
- if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME)))
- throw new IOException("Segment already parsed!");
}
public RecordWriter<Text, Parse> getRecordWriter(FileSystem fs, JobConf job,
--
To stop receiving notification emails like this one, please contact
[email protected].