Author: markus
Date: Tue Aug 16 11:59:01 2011
New Revision: 1158215

URL: http://svn.apache.org/viewvc?rev=1158215&view=rev
Log:
NUTCH-1004 Do not index empty values for title field

Modified:
    nutch/trunk/CHANGES.txt
    
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1158215&r1=1158214&r2=1158215&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Aug 16 11:59:01 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.0 - Current Development
 
+* NUTCH-1004 Do not index empty values for title field (markus)
+
 * NUTCH-914 Implement Apache Project Branding Requirements (lewismc via 
jnioche)
 
 * NUTCH-1065 New mvn.template (lewismc)

Modified: 
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=1158215&r1=1158214&r2=1158215&view=diff
==============================================================================
--- 
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
 (original)
+++ 
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
 Tue Aug 16 11:59:01 2011
@@ -95,8 +95,10 @@ public class BasicIndexingFilter impleme
     if (title.length() > MAX_TITLE_LENGTH) { // truncate title if needed
       title = title.substring(0, MAX_TITLE_LENGTH);
     }
-    // add title indexed and stored so that it can be displayed
-    doc.add("title", title);
+    if (title.length() > 0) {
+      // NUTCH-1004 Do not index empty values for title field
+      doc.add("title", title);
+    }
     // add cached content/summary display policy, if available
     ByteBuffer cachingRaw = page
         .getFromMetadata(Nutch.CACHING_FORBIDDEN_KEY_UTF8);


Reply via email to