Author: fenglu
Date: Thu Mar 28 13:04:28 2013
New Revision: 1462078
URL: http://svn.apache.org/r1462078
Log:
NUTCH-1547 BasicIndexingFilter - Problem to index full title
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/nutch-default.xml
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1462078&r1=1462077&r2=1462078&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Mar 28 13:04:28 2013
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk): Current Development
+* NUTCH-1547 BasicIndexingFilter - Problem to index full title (Feng)
+
* NUTCH-1389 parsechecker and indexchecker to report truncated content (snagel)
* NUTCH-1419 parsechecker and indexchecker to report protocol status (snagel +
lewismc)
Modified: nutch/trunk/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1462078&r1=1462077&r2=1462078&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Thu Mar 28 13:04:28 2013
@@ -897,7 +897,7 @@
<property>
<name>indexer.max.title.length</name>
<value>100</value>
- <description>The maximum number of characters of a title that are indexed.
+ <description>The maximum number of characters of a title that are indexed. A
value of -1 disables this check.
</description>
</property>
Modified:
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=1462078&r1=1462077&r2=1462078&view=diff
==============================================================================
---
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
(original)
+++
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
Thu Mar 28 13:04:28 2013
@@ -108,7 +108,7 @@ public class BasicIndexingFilter impleme
// title
String title = parse.getData().getTitle();
- if (title.length() > MAX_TITLE_LENGTH) { // truncate title if needed
+ if (MAX_TITLE_LENGTH > -1 && title.length() > MAX_TITLE_LENGTH) { //
truncate title if needed
title = title.substring(0, MAX_TITLE_LENGTH);
}