Author: jnioche
Date: Mon Oct 7 09:23:47 2013
New Revision: 1529802
URL: http://svn.apache.org/r1529802
Log:
NUTCH-1640 Reuse ParseUtil instance
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1529802&r1=1529801&r2=1529802&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Oct 7 09:23:47 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Development Trunk
+* NUTCH-1640 Reuse ParseUtil instance in ParseSegment (Mitesh Singh Jat via
jnioche)
+
* NUTCH-1639 bin/crawl fails on mac os (various contributors via snagel)
* NUTCH-1646 IndexerMapReduce to consider DB status (markus)
Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?rev=1529802&r1=1529801&r2=1529802&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java Mon Oct 7
09:23:47 2013
@@ -51,6 +51,8 @@ public class ParseSegment extends Config
private ScoringFilters scfilters;
+ private ParseUtil parseUtil;
+
private boolean skipTruncated;
public ParseSegment() {
@@ -94,7 +96,9 @@ public class ParseSegment extends Config
ParseResult parseResult = null;
try {
- parseResult = new ParseUtil(getConf()).parse(content);
+ //if (parseUtil == null)
+ parseUtil = new ParseUtil(getConf());
+ parseResult = parseUtil.parse(content);
} catch (Exception e) {
LOG.warn("Error parsing: " + key + ": " +
StringUtils.stringifyException(e));
return;