Author: markus
Date: Tue Jun 12 10:22:00 2012
New Revision: 1349233
URL: http://svn.apache.org/viewvc?rev=1349233&view=rev
Log:
NUTCH-1386 Headings filter not to add empty values
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1349233&r1=1349232&r2=1349233&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 12 10:22:00 2012
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk) Current Development:
+* NUTCH-1386 Headings filter not to add empty values (markus)
+
* NUTCH-1356 ParseUtil use ExecutorService instead of manually thread handling
(ferdy via markus)
* NUTCH-1352 Improve regex urlfilters/normalizers synchronization (ferdy via
markus)
Modified:
nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java?rev=1349233&r1=1349232&r2=1349233&view=diff
==============================================================================
---
nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java
(original)
+++
nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java
Tue Jun 12 10:22:00 2012
@@ -48,7 +48,11 @@ public class HeadingsParseFilter impleme
heading = getElement(headings[i]);
if (heading != null) {
- parse.getData().getParseMeta().set(headings[i], heading.trim());
+ heading.trim();
+
+ if (heading.length() > 0) {
+ parse.getData().getParseMeta().set(headings[i], heading);
+ }
}
}
@@ -89,13 +93,13 @@ public class HeadingsParseFilter impleme
* Returns the text value of the specified Node and child nodes
*/
protected static String getNodeValue(Node node) {
- StringBuffer buffer = new StringBuffer();
+ StringBuilder buffer = new StringBuilder();
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
if (children.item(i).getNodeType() == Node.TEXT_NODE) {
- buffer.append(children.item(i).getNodeValue());
+ buffer.append(children.item(i).getNodeValue());
}
}