Author: markus
Date: Thu Jul  4 08:50:25 2013
New Revision: 1499684

URL: http://svn.apache.org/r1499684
Log:
NUTCH-1600 Injector overwrite does not always work properly

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1499684&r1=1499683&r2=1499684&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jul  4 08:50:25 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Development Trunk
 
+* NUTCH-1600 Injector overwrite does not always work properly (markus)
+
 * NUTCH-1581 CrawlDB csv output to include metadata (markus)
 
 * NUTCH-1327 QueryStringNormalizer (markus)

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=1499684&r1=1499683&r2=1499684&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Thu Jul  4 
08:50:25 2013
@@ -186,6 +186,8 @@ public class Injector extends Configured
       scoreInjected = job.getFloat("db.score.injected", 1.0f);
       overwrite = job.getBoolean("db.injector.overwrite", false);
       update = job.getBoolean("db.injector.update", false);
+      LOG.info("Injector: overwrite: " + overwrite);
+      LOG.info("Injector: update: " + update);
     }
     
     public void close() {}
@@ -209,22 +211,20 @@ public class Injector extends Configured
           oldSet = true;
         }
       }
+
       CrawlDatum res = null;
+                
+      // Old default behaviour
+      if (injectedSet && !oldSet) {
+        res = injected;
+      } else {
+        res = old;
+      }
       
       /**
        * Whether to overwrite, ignore or update existing records
        * @see https://issues.apache.org/jira/browse/NUTCH-1405
        */
-      
-      // Injected record already exists and overwrite but not update
-      if (injectedSet && oldSet && overwrite) {
-        res = injected;
-        
-        if (update) {
-          LOG.info(key.toString() + " overwritten with injected record but 
update was specified.");
-        }
-      }
-
       // Injected record already exists and update but not overwrite
       if (injectedSet && oldSet && update && !overwrite) {
         res = old;
@@ -233,11 +233,9 @@ public class Injector extends Configured
         old.setFetchInterval(injected.getFetchInterval() != interval ? 
injected.getFetchInterval() : old.getFetchInterval());
       }
       
-      // Old default behaviour
-      if (injectedSet && !oldSet) {
+      // Injected record already exists and overwrite
+      if (injectedSet && oldSet && overwrite) {
         res = injected;
-      } else {
-        res = old;
       }
 
       output.collect(key, res);


Reply via email to