Author: markus Date: Thu Jul 4 08:50:25 2013 New Revision: 1499684 URL: http://svn.apache.org/r1499684 Log: NUTCH-1600 Injector overwrite does not always work properly
Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1499684&r1=1499683&r2=1499684&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Jul 4 08:50:25 2013 @@ -2,6 +2,8 @@ Nutch Change Log Nutch Development Trunk +* NUTCH-1600 Injector overwrite does not always work properly (markus) + * NUTCH-1581 CrawlDB csv output to include metadata (markus) * NUTCH-1327 QueryStringNormalizer (markus) Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=1499684&r1=1499683&r2=1499684&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Thu Jul 4 08:50:25 2013 @@ -186,6 +186,8 @@ public class Injector extends Configured scoreInjected = job.getFloat("db.score.injected", 1.0f); overwrite = job.getBoolean("db.injector.overwrite", false); update = job.getBoolean("db.injector.update", false); + LOG.info("Injector: overwrite: " + overwrite); + LOG.info("Injector: update: " + update); } public void close() {} @@ -209,22 +211,20 @@ public class Injector extends Configured oldSet = true; } } + CrawlDatum res = null; + + // Old default behaviour + if (injectedSet && !oldSet) { + res = injected; + } else { + res = old; + } /** * Whether to overwrite, ignore or update existing records * @see https://issues.apache.org/jira/browse/NUTCH-1405 */ - - // Injected record already exists and overwrite but not update - if (injectedSet && oldSet && overwrite) { - res = injected; - - if (update) { - LOG.info(key.toString() + " overwritten with injected record but update was specified."); - } - } - // Injected record already exists and update but not overwrite if (injectedSet && oldSet && update && !overwrite) { res = old; @@ -233,11 +233,9 @@ public class Injector extends Configured old.setFetchInterval(injected.getFetchInterval() != interval ? injected.getFetchInterval() : old.getFetchInterval()); } - // Old default behaviour - if (injectedSet && !oldSet) { + // Injected record already exists and overwrite + if (injectedSet && oldSet && overwrite) { res = injected; - } else { - res = old; } output.collect(key, res);