Author: jerome
Date: Thu Apr 6 03:49:40 2006
New Revision: 391958
URL: http://svn.apache.org/viewcvs?rev=391958view=rev
Log:
NUTCH-244, db.max.outlinks.per.page can now be negative for no limit of handled
outlinks per page
Modified:
lucene/nutch/trunk/conf/nutch-default.xml
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java
Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=391958r1=391957r2=391958view=diff
==
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Thu Apr 6 03:49:40 2006
@@ -255,6 +255,8 @@
namedb.max.outlinks.per.page/name
value100/value
descriptionThe maximum number of outlinks that we'll process for a page.
+ If this value is nonnegative (=0), at most db.max.outlinks.per.page outlinks
+ will be processed for a page; otherwise, all outlinks will be processed.
/description
/property
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java?rev=391958r1=391957r2=391958view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java Thu Apr
6 03:49:40 2006
@@ -119,12 +119,15 @@
int totalOutlinks = in.readInt(); // read outlinks
int maxOutlinksPerPage = this.conf.getInt(db.max.outlinks.per.page, 100);
-int outlinksToRead = Math.min(maxOutlinksPerPage, totalOutlinks);
+int outlinksToRead = totalOutlinks;
+if (maxOutlinksPerPage = 0) {
+ outlinksToRead = Math.min(maxOutlinksPerPage, totalOutlinks);
+}
outlinks = new Outlink[outlinksToRead];
for (int i = 0; i outlinksToRead; i++) {
outlinks[i] = Outlink.read(in);
}
-for (int i = maxOutlinksPerPage; i totalOutlinks; i++) {
+for (int i = outlinksToRead; i totalOutlinks; i++) {
Outlink.skip(in);
}
Modified: lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java?rev=391958r1=391957r2=391958view=diff
==
--- lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java Thu
Apr 6 03:49:40 2006
@@ -51,4 +51,31 @@
WritableTestUtils.testWritable(r, conf);
}
+ public void testMaxOutlinks() throws Exception {
+Outlink[] outlinks = new Outlink[128];
+for (int i=0; ioutlinks.length; i++) {
+ outlinks[i] = new Outlink(http://outlink.com/; + i, Outlink + i,
conf);
+}
+ParseData original = new ParseData(ParseStatus.STATUS_SUCCESS,
+ Max Outlinks Title,
+ outlinks,
+ new Metadata());
+Configuration conf = NutchConfiguration.create();
+// No Outlinks
+conf.setInt(db.max.outlinks.per.page, 0);
+ParseData data = (ParseData) WritableTestUtils.writeRead(original, conf);
+assertEquals(0, data.getOutlinks().length);
+// Only 100 Outlinks
+conf.setInt(db.max.outlinks.per.page, 100);
+data = (ParseData) WritableTestUtils.writeRead(original, conf);
+assertEquals(100, data.getOutlinks().length);
+// 256 Outlinks
+conf.setInt(db.max.outlinks.per.page, 256);
+data = (ParseData) WritableTestUtils.writeRead(original, conf);
+assertEquals(outlinks.length, data.getOutlinks().length);
+// All Outlinks
+conf.setInt(db.max.outlinks.per.page, -1);
+data = (ParseData) WritableTestUtils.writeRead(original, conf);
+assertEquals(outlinks.length, data.getOutlinks().length);
+ }
}
Modified:
lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java?rev=391958r1=391957r2=391958view=diff
==
--- lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java
Thu Apr 6 03:49:40 2006
@@ -31,6 +31,14 @@
/** Utility method for testing writables. */
public static void testWritable(Writable before, Configuration conf)
throws Exception {
+TestCase.assertEquals(before, writeRead(before, conf));
+