Author: fenglu
Date: Mon Jul 1 13:34:23 2013
New Revision: 1498437
URL: http://svn.apache.org/r1498437
Log:
NUTCH-1594 count variable is never changed in ParseUtil class
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/parse/ParseUtil.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1498437&r1=1498436&r2=1498437&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Jul 1 13:34:23 2013
@@ -2,7 +2,7 @@ Nutch Change Log
Current Development
-NUTCH-
+* NUTCH-1594 count variable is never changed in ParseUtil class (Canan via
Feng)
Release 2.2.1 - 06/27/2013 (mm/dd/yyyy)
Release Report - http://s.apache.org/PGa
Modified: nutch/branches/2.x/src/java/org/apache/nutch/parse/ParseUtil.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/parse/ParseUtil.java?rev=1498437&r1=1498436&r2=1498437&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/parse/ParseUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/parse/ParseUtil.java Mon Jul
1 13:34:23 2013
@@ -239,7 +239,7 @@ public class ParseUtil extends Configure
page.getOutlinks().clear();
}
final Outlink[] outlinks = parse.getOutlinks();
- final int count = 0;
+ int outlinksToStore = Math.min(maxOutlinks, outlinks.length);
String fromHost;
if (ignoreExternalLinks) {
try {
@@ -250,7 +250,9 @@ public class ParseUtil extends Configure
} else {
fromHost = null;
}
- for (int i = 0; count < maxOutlinks && i < outlinks.length; i++) {
+ int validCount = 0;
+
+ for (int i = 0; validCount < outlinksToStore && i < outlinks.length;
i++) {
String toUrl = outlinks[i].getToUrl();
try {
toUrl = normalizers.normalize(toUrl, URLNormalizers.SCOPE_OUTLINK);
@@ -279,7 +281,7 @@ public class ParseUtil extends Configure
continue; // skip it
}
}
-
+ validCount++;
page.putToOutlinks(utf8ToUrl, new Utf8(outlinks[i].getAnchor()));
}
Utf8 fetchMark = Mark.FETCH_MARK.checkMark(page);