Author: markus
Date: Tue Aug 16 11:58:12 2011
New Revision: 1158214
URL: http://svn.apache.org/viewvc?rev=1158214&view=rev
Log:
NUTCH-1004 Do not index empty values for title field
Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
Modified: nutch/branches/branch-1.4/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1158214&r1=1158213&r2=1158214&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Tue Aug 16 11:58:12 2011
@@ -2,6 +2,8 @@ Nutch Change Log
Release 1.4 - Current development
+* NUTCH-1004 Do not index empty values for title field (markus)
+
* NUTCH-914 Implement Apache Project Branding Requirements (lewismc via
jnioche)
* NUTCH-1069 Readlinkdb broken on Hadoop > 0.20 (markus)
Modified:
nutch/branches/branch-1.4/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=1158214&r1=1158213&r2=1158214&view=diff
==============================================================================
---
nutch/branches/branch-1.4/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
(original)
+++
nutch/branches/branch-1.4/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
Tue Aug 16 11:58:12 2011
@@ -77,7 +77,11 @@ public class BasicIndexingFilter impleme
if (title.length() > MAX_TITLE_LENGTH) { // truncate title if needed
title = title.substring(0, MAX_TITLE_LENGTH);
}
- doc.add("title", title);
+
+ if (title.length() > 0) {
+ // NUTCH-1004 Do not index empty values for title field
+ doc.add("title", title);
+ }
// add cached content/summary display policy, if available
String caching = parse.getData().getMeta(Nutch.CACHING_FORBIDDEN_KEY);