Author: ab Date: Thu Feb 23 09:21:38 2006 New Revision: 380163 URL: http://svn.apache.org/viewcvs?rev=380163&view=rev Log: Modify the cmd-line so that it's possible to perform incremental updates on existing linkDb. This significantly speeds up the "invertlinks" operation.
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=380163&r1=380162&r2=380163&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Thu Feb 23 09:21:38 2006 @@ -190,11 +190,22 @@ LinkDb linkDb = new LinkDb(NutchConfiguration.create()); if (args.length < 2) { - System.err.println("Usage: <linkdb> <segments>"); + System.err.println("Usage: <linkdb> (-dir segmentsDir | segment1 segment2 ...)"); return; } - - linkDb.invert(new File(args[0]), new File(args[1])); + boolean dir = false; + File segDir = null; + File db = new File(args[0]); + ArrayList segs = new ArrayList(); + for (int i = 1; i < args.length; i++) { + if (args[i].equals("-dir")) { + dir = true; + segDir = new File(args[++i]); + break; + } else segs.add(new File(args[i])); + } + if (dir) linkDb.invert(db, segDir); + else linkDb.invert(db, (File[])segs.toArray(new File[segs.size()])); }