Author: markus
Date: Thu Jul 4 08:50:25 2013
New Revision: 1499684
URL: http://svn.apache.org/r1499684
Log:
NUTCH-1600 Injector overwrite does not always work properly
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1499684&r1=1499683&r2=1499684&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jul 4 08:50:25 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Development Trunk
+* NUTCH-1600 Injector overwrite does not always work properly (markus)
+
* NUTCH-1581 CrawlDB csv output to include metadata (markus)
* NUTCH-1327 QueryStringNormalizer (markus)
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=1499684&r1=1499683&r2=1499684&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Thu Jul 4
08:50:25 2013
@@ -186,6 +186,8 @@ public class Injector extends Configured
scoreInjected = job.getFloat("db.score.injected", 1.0f);
overwrite = job.getBoolean("db.injector.overwrite", false);
update = job.getBoolean("db.injector.update", false);
+ LOG.info("Injector: overwrite: " + overwrite);
+ LOG.info("Injector: update: " + update);
}
public void close() {}
@@ -209,22 +211,20 @@ public class Injector extends Configured
oldSet = true;
}
}
+
CrawlDatum res = null;
+
+ // Old default behaviour
+ if (injectedSet && !oldSet) {
+ res = injected;
+ } else {
+ res = old;
+ }
/**
* Whether to overwrite, ignore or update existing records
* @see https://issues.apache.org/jira/browse/NUTCH-1405
*/
-
- // Injected record already exists and overwrite but not update
- if (injectedSet && oldSet && overwrite) {
- res = injected;
-
- if (update) {
- LOG.info(key.toString() + " overwritten with injected record but
update was specified.");
- }
- }
-
// Injected record already exists and update but not overwrite
if (injectedSet && oldSet && update && !overwrite) {
res = old;
@@ -233,11 +233,9 @@ public class Injector extends Configured
old.setFetchInterval(injected.getFetchInterval() != interval ?
injected.getFetchInterval() : old.getFetchInterval());
}
- // Old default behaviour
- if (injectedSet && !oldSet) {
+ // Injected record already exists and overwrite
+ if (injectedSet && oldSet && overwrite) {
res = injected;
- } else {
- res = old;
}
output.collect(key, res);