Author: markus Date: Tue Jun 12 10:22:00 2012 New Revision: 1349233 URL: http://svn.apache.org/viewvc?rev=1349233&view=rev Log: NUTCH-1386 Headings filter not to add empty values
Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1349233&r1=1349232&r2=1349233&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Tue Jun 12 10:22:00 2012 @@ -2,6 +2,8 @@ Nutch Change Log (trunk) Current Development: +* NUTCH-1386 Headings filter not to add empty values (markus) + * NUTCH-1356 ParseUtil use ExecutorService instead of manually thread handling (ferdy via markus) * NUTCH-1352 Improve regex urlfilters/normalizers synchronization (ferdy via markus) Modified: nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java?rev=1349233&r1=1349232&r2=1349233&view=diff ============================================================================== --- nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java (original) +++ nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/HeadingsParseFilter.java Tue Jun 12 10:22:00 2012 @@ -48,7 +48,11 @@ public class HeadingsParseFilter impleme heading = getElement(headings[i]); if (heading != null) { - parse.getData().getParseMeta().set(headings[i], heading.trim()); + heading.trim(); + + if (heading.length() > 0) { + parse.getData().getParseMeta().set(headings[i], heading); + } } } @@ -89,13 +93,13 @@ public class HeadingsParseFilter impleme * Returns the text value of the specified Node and child nodes */ protected static String getNodeValue(Node node) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); NodeList children = node.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { if (children.item(i).getNodeType() == Node.TEXT_NODE) { - buffer.append(children.item(i).getNodeValue()); + buffer.append(children.item(i).getNodeValue()); } }