Author: fenglu
Date: Thu Mar 28 13:09:09 2013
New Revision: 1462079
URL: http://svn.apache.org/r1462079
Log:
NUTCH-1547 BasicIndexingFilter - Problem to index full title
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/conf/nutch-default.xml
nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1462079&r1=1462078&r2=1462079&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Thu Mar 28 13:09:09 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.2 - Current Development
+* NUTCH-1547 BasicIndexingFilter - Problem to index full title (Feng)
+
* NUTCH-1389 parsechecker and indexchecker to report truncated content (snagel)
* NUTCH-1419 parsechecker and indexchecker to report protocol status (snagel
via lewismc)
Modified: nutch/branches/2.x/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/nutch-default.xml?rev=1462079&r1=1462078&r2=1462079&view=diff
==============================================================================
--- nutch/branches/2.x/conf/nutch-default.xml (original)
+++ nutch/branches/2.x/conf/nutch-default.xml Thu Mar 28 13:09:09 2013
@@ -752,7 +752,7 @@
<property>
<name>indexer.max.title.length</name>
<value>100</value>
- <description>The maximum number of characters of a title that are indexed.
+ <description>The maximum number of characters of a title that are indexed. A
value of -1 disables this check.
Used by index-basic.
</description>
</property>
Modified:
nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=1462079&r1=1462078&r2=1462079&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
(original)
+++
nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
Thu Mar 28 13:09:09 2013
@@ -109,7 +109,7 @@ public class BasicIndexingFilter impleme
// title
String title = TableUtil.toString(page.getTitle());
- if (title.length() > MAX_TITLE_LENGTH) { // truncate title if needed
+ if (MAX_TITLE_LENGTH > -1 && title.length() > MAX_TITLE_LENGTH) { //
truncate title if needed
title = title.substring(0, MAX_TITLE_LENGTH);
}
if (title.length() > 0) {