Author: snagel
Date: Thu May 14 10:08:15 2015
New Revision: 1679335

URL: http://svn.apache.org/r1679335
Log:
NUTCH-2008 IndexerMapReduce to use single instance of NutchIndexAction for 
deletions

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1679335&r1=1679334&r2=1679335&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu May 14 10:08:15 2015
@@ -2,6 +2,8 @@ Nutch Change Log
  
 Nutch Current Development 1.11-SNAPSHOT
 
+* NUTCH-2008 IndexerMapReduce to use single instance of NutchIndexAction for 
deletions (snagel)
+
 * NUTCH-1998 Add support for user-defined file extension to 
CommonCrawlDataDumper (totaro via mattmann)
 
 * NUTCH-1873 Solr IndexWriter/Job to report number of docs indexed. (snagel 
via lewismc)

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java?rev=1679335&r1=1679334&r2=1679335&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java Thu May 
14 10:08:15 2015
@@ -77,6 +77,10 @@ public class IndexerMapReduce extends Co
   private URLNormalizers urlNormalizers;
   private URLFilters urlFilters;
 
+  /** Predefined action to delete documents from the index */
+  private static final NutchIndexAction DELETE_ACTION = new NutchIndexAction(
+      null, NutchIndexAction.DELETE);
+
   public void configure(JobConf job) {
     setConf(job);
     this.filters = new IndexingFilters(getConf());
@@ -206,9 +210,7 @@ public class IndexerMapReduce extends Co
           if (robotsMeta != null
               && robotsMeta.toLowerCase().indexOf("noindex") != -1) {
             // Delete it!
-            NutchIndexAction action = new NutchIndexAction(null,
-                NutchIndexAction.DELETE);
-            output.collect(key, action);
+            output.collect(key, DELETE_ACTION);
             reporter.incrCounter("IndexerStatus", "deleted (robots=noindex)", 
1);
             return;
           }
@@ -225,10 +227,7 @@ public class IndexerMapReduce extends Co
       if (fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_GONE
           || dbDatum.getStatus() == CrawlDatum.STATUS_DB_GONE) {
         reporter.incrCounter("IndexerStatus", "deleted (gone)", 1);
-
-        NutchIndexAction action = new NutchIndexAction(null,
-            NutchIndexAction.DELETE);
-        output.collect(key, action);
+        output.collect(key, DELETE_ACTION);
         return;
       }
 
@@ -237,10 +236,7 @@ public class IndexerMapReduce extends Co
           || dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_PERM
           || dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_TEMP) {
         reporter.incrCounter("IndexerStatus", "deleted redirects", 1);
-
-        NutchIndexAction action = new NutchIndexAction(null,
-            NutchIndexAction.DELETE);
-        output.collect(key, action);
+        output.collect(key, DELETE_ACTION);
         return;
       }
     }
@@ -253,9 +249,7 @@ public class IndexerMapReduce extends Co
     // Whether to delete pages marked as duplicates
     if (delete && dbDatum.getStatus() == CrawlDatum.STATUS_DB_DUPLICATE) {
       reporter.incrCounter("IndexerStatus", "deleted duplicates", 1);
-      NutchIndexAction action = new NutchIndexAction(null,
-          NutchIndexAction.DELETE);
-      output.collect(key, action);
+      output.collect(key, DELETE_ACTION);
       return;
     }
 


Reply via email to