Author: markus
Date: Tue Aug 16 11:59:01 2011
New Revision: 1158215
URL: http://svn.apache.org/viewvc?rev=1158215&view=rev
Log:
NUTCH-1004 Do not index empty values for title field
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1158215&r1=1158214&r2=1158215&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Aug 16 11:59:01 2011
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.0 - Current Development
+* NUTCH-1004 Do not index empty values for title field (markus)
+
* NUTCH-914 Implement Apache Project Branding Requirements (lewismc via
jnioche)
* NUTCH-1065 New mvn.template (lewismc)
Modified:
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=1158215&r1=1158214&r2=1158215&view=diff
==============================================================================
---
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
(original)
+++
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
Tue Aug 16 11:59:01 2011
@@ -95,8 +95,10 @@ public class BasicIndexingFilter impleme
if (title.length() > MAX_TITLE_LENGTH) { // truncate title if needed
title = title.substring(0, MAX_TITLE_LENGTH);
}
- // add title indexed and stored so that it can be displayed
- doc.add("title", title);
+ if (title.length() > 0) {
+ // NUTCH-1004 Do not index empty values for title field
+ doc.add("title", title);
+ }
// add cached content/summary display policy, if available
ByteBuffer cachingRaw = page
.getFromMetadata(Nutch.CACHING_FORBIDDEN_KEY_UTF8);