Author: lewismc
Date: Thu Jun 14 12:34:08 2012
New Revision: 1350213
URL: http://svn.apache.org/viewvc?rev=1350213&view=rev
Log:
-force and -resume arguments being ignored in ParserJob NUTCH-1392
Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java
nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java
Modified: nutch/branches/nutchgora/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1350213&r1=1350212&r2=1350213&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Thu Jun 14 12:34:08 2012
@@ -3,6 +3,8 @@ Nutch Change Log
Release 2.0 (08/06/2012) ddmmyyy
Full Jira report -
https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=10680&version=12314893
+* NUTCH-1392 -force and -resume arguments being ignored in ParserJob (ferdy
via lewismc)
+
* NUTCH-1379 NPE when reprUrl is null in ParseUtil (ferdy)
* NUTCH-1378 HostDb NullPointerException (ferdy)
Modified:
nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java?rev=1350213&r1=1350212&r2=1350213&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java
(original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java
Thu Jun 14 12:34:08 2012
@@ -265,7 +265,7 @@ public class FetcherJob extends NutchToo
String batchId;
String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] " +
- "[-threads N] [-parse] \n \t \t [-resume] [-numTasks N]\n" +
+ "[-threads N] \n \t \t [-resume] [-numTasks N]\n" +
" <batchId> - crawl identifier returned by Generator, or -all for
all \n \t \t generated batchId-s\n" +
" -crawlId <id> - the id to prefix the schemas to operate on, \n \t
\t (default: storage.crawl.id)\n" +
" -threads N - number of fetching threads per task\n" +
Modified:
nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java?rev=1350213&r1=1350212&r2=1350213&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java
(original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java Thu
Jun 14 12:34:08 2012
@@ -231,6 +231,13 @@ public class ParserJob extends NutchTool
if (force != null) {
getConf().setBoolean(FORCE_KEY, force);
}
+ LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY,
false));
+ LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY,
false));
+ if (batchId == null || batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
+ LOG.info("ParserJob: parsing all");
+ } else {
+ LOG.info("ParserJob: batchId:\t" + batchId);
+ }
currentJob = new NutchJob(getConf(), "parse");
Collection<WebPage.Field> fields = getFields(currentJob);
@@ -247,13 +254,6 @@ public class ParserJob extends NutchTool
public int parse(String batchId, boolean shouldResume, boolean force) throws
Exception {
LOG.info("ParserJob: starting");
- LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY,
false));
- LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY,
false));
- if (batchId == null || batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
- LOG.info("ParserJob: parsing all");
- } else {
- LOG.info("ParserJob: batchId:\t" + batchId);
- }
run(ToolUtil.toArgMap(
Nutch.ARG_BATCH, batchId,
Nutch.ARG_RESUME, shouldResume,