Author: cutting
Date: Fri Aug 26 09:39:11 2005
New Revision: 240280

URL: http://svn.apache.org/viewcvs?rev=240280&view=rev
Log:
Limit to 10,000 inlinks by default.  Also optimize a common case.

Modified:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java

Modified: 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java?rev=240280&r1=240279&r2=240280&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java 
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java 
Fri Aug 26 09:39:11 2005
@@ -49,7 +49,7 @@
 
   public void configure(JobConf job) {
     maxAnchorLength = job.getInt("db.max.anchor.length", 100);
-    maxInlinks = job.getInt("db.max.inlinks", 100000);
+    maxInlinks = job.getInt("db.max.inlinks", 10000);
   }
 
   public void map(WritableComparable key, Writable value,
@@ -74,9 +74,21 @@
   public void reduce(WritableComparable key, Iterator values,
                      OutputCollector output, Reporter reporter)
     throws IOException {
-    Inlinks result = new Inlinks();
+
+    Inlinks result = null;
+
     while (values.hasNext()) {
       Inlinks inlinks = (Inlinks)values.next();
+
+      if (result == null) {                       // optimize a common case
+        if (inlinks.size() < maxInlinks) {
+          result = inlinks;
+          continue;
+        } else {
+          result = new Inlinks();
+        }
+      }
+
       int end = Math.min(maxInlinks - result.size(), inlinks.size());
       for (int i = 0; i < end; i++) {
         result.add(inlinks.get(i));


Reply via email to