Author: ab
Date: Thu Feb 23 09:21:38 2006
New Revision: 380163
URL: http://svn.apache.org/viewcvs?rev=380163&view=rev
Log:
Modify the cmd-line so that it's possible to perform incremental
updates on existing linkDb. This significantly speeds up the
"invertlinks" operation.
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=380163&r1=380162&r2=380163&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Thu Feb 23
09:21:38 2006
@@ -190,11 +190,22 @@
LinkDb linkDb = new LinkDb(NutchConfiguration.create());
if (args.length < 2) {
- System.err.println("Usage: <linkdb> <segments>");
+ System.err.println("Usage: <linkdb> (-dir segmentsDir | segment1
segment2 ...)");
return;
}
-
- linkDb.invert(new File(args[0]), new File(args[1]));
+ boolean dir = false;
+ File segDir = null;
+ File db = new File(args[0]);
+ ArrayList segs = new ArrayList();
+ for (int i = 1; i < args.length; i++) {
+ if (args[i].equals("-dir")) {
+ dir = true;
+ segDir = new File(args[++i]);
+ break;
+ } else segs.add(new File(args[i]));
+ }
+ if (dir) linkDb.invert(db, segDir);
+ else linkDb.invert(db, (File[])segs.toArray(new File[segs.size()]));
}