Author: cutting
Date: Fri Aug 26 09:39:11 2005
New Revision: 240280
URL: http://svn.apache.org/viewcvs?rev=240280&view=rev
Log:
Limit to 10,000 inlinks by default. Also optimize a common case.
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java?rev=240280&r1=240279&r2=240280&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
Fri Aug 26 09:39:11 2005
@@ -49,7 +49,7 @@
public void configure(JobConf job) {
maxAnchorLength = job.getInt("db.max.anchor.length", 100);
- maxInlinks = job.getInt("db.max.inlinks", 100000);
+ maxInlinks = job.getInt("db.max.inlinks", 10000);
}
public void map(WritableComparable key, Writable value,
@@ -74,9 +74,21 @@
public void reduce(WritableComparable key, Iterator values,
OutputCollector output, Reporter reporter)
throws IOException {
- Inlinks result = new Inlinks();
+
+ Inlinks result = null;
+
while (values.hasNext()) {
Inlinks inlinks = (Inlinks)values.next();
+
+ if (result == null) { // optimize a common case
+ if (inlinks.size() < maxInlinks) {
+ result = inlinks;
+ continue;
+ } else {
+ result = new Inlinks();
+ }
+ }
+
int end = Math.min(maxInlinks - result.size(), inlinks.size());
for (int i = 0; i < end; i++) {
result.add(inlinks.get(i));