Author: markus Date: Tue Aug 16 11:59:01 2011 New Revision: 1158215 URL: http://svn.apache.org/viewvc?rev=1158215&view=rev Log: NUTCH-1004 Do not index empty values for title field
Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1158215&r1=1158214&r2=1158215&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Tue Aug 16 11:59:01 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release 2.0 - Current Development +* NUTCH-1004 Do not index empty values for title field (markus) + * NUTCH-914 Implement Apache Project Branding Requirements (lewismc via jnioche) * NUTCH-1065 New mvn.template (lewismc) Modified: nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=1158215&r1=1158214&r2=1158215&view=diff ============================================================================== --- nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java (original) +++ nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java Tue Aug 16 11:59:01 2011 @@ -95,8 +95,10 @@ public class BasicIndexingFilter impleme if (title.length() > MAX_TITLE_LENGTH) { // truncate title if needed title = title.substring(0, MAX_TITLE_LENGTH); } - // add title indexed and stored so that it can be displayed - doc.add("title", title); + if (title.length() > 0) { + // NUTCH-1004 Do not index empty values for title field + doc.add("title", title); + } // add cached content/summary display policy, if available ByteBuffer cachingRaw = page .getFromMetadata(Nutch.CACHING_FORBIDDEN_KEY_UTF8);