Author: markus
Date: Tue Jun 12 10:22:00 2012
New Revision: 1349233

URL: http://svn.apache.org/viewvc?rev=1349233&view=rev
Log:
NUTCH-1386 Headings filter not to add empty values

Modified:
    nutch/trunk/CHANGES.txt
    
nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1349233&r1=1349232&r2=1349233&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 12 10:22:00 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1386 Headings filter not to add empty values (markus)
+
 * NUTCH-1356 ParseUtil use ExecutorService instead of manually thread handling 
(ferdy via markus)
 
 * NUTCH-1352 Improve regex urlfilters/normalizers synchronization (ferdy via 
markus)

Modified: 
nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java?rev=1349233&r1=1349232&r2=1349233&view=diff
==============================================================================
--- 
nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java
 (original)
+++ 
nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java
 Tue Jun 12 10:22:00 2012
@@ -48,7 +48,11 @@ public class HeadingsParseFilter impleme
       heading = getElement(headings[i]);
 
       if (heading != null) {
-        parse.getData().getParseMeta().set(headings[i], heading.trim());
+        heading.trim();
+
+        if (heading.length() > 0) {
+          parse.getData().getParseMeta().set(headings[i], heading);
+        }
       }
     }
 
@@ -89,13 +93,13 @@ public class HeadingsParseFilter impleme
    * Returns the text value of the specified Node and child nodes
    */
   protected static String getNodeValue(Node node) {
-    StringBuffer buffer = new StringBuffer();
+    StringBuilder buffer = new StringBuilder();
 
     NodeList children = node.getChildNodes();
 
     for (int i = 0; i < children.getLength(); i++) {
       if (children.item(i).getNodeType() == Node.TEXT_NODE) {
-          buffer.append(children.item(i).getNodeValue());
+        buffer.append(children.item(i).getNodeValue());
       }
     }
 


Reply via email to