Update of /cvsroot/nutch/nutch/src/java/net/nutch/tools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20100/src/java/net/nutch/tools
Modified Files: CrawlTool.java SegmentMergeTool.java Log Message: Modify the NutchFileSystem so caller can more transparently modify files that could be local or remote. In the local case, they are modified directly in place. In the remote case, they are copied to a local path, then placed back on the remote fs. This was the effect of previous code, but it could sometimes result in unnecessary copying. This API I think is more clear, and has no inefficiencies for the localfs case. (And is as fast as possible for remotefs, given that we have no NDFS API for Lucene-formatted files.) Index: CrawlTool.java =================================================================== RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/tools/CrawlTool.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** CrawlTool.java 8 Sep 2004 16:29:13 -0000 1.10 --- CrawlTool.java 4 Oct 2004 15:42:58 -0000 1.11 *************** *** 82,86 **** try { String rootUrlFile = null; ! String dir = "crawl-" + getDate(); int threads = NutchConf.getInt("fetcher.threads.fetch", 10); int depth = 5; --- 82,86 ---- try { String rootUrlFile = null; ! String dir = new File("crawl-" + getDate()).getCanonicalFile().getName(); int threads = NutchConf.getInt("fetcher.threads.fetch", 10); int depth = 5; Index: SegmentMergeTool.java =================================================================== RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/tools/SegmentMergeTool.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** SegmentMergeTool.java 23 Aug 2004 04:13:27 -0000 1.4 --- SegmentMergeTool.java 4 Oct 2004 15:42:58 -0000 1.5 *************** *** 173,181 **** // Index this segment LOG.info(" - creating missing index for " + segdirs[i].getName()); ! IndexSegment.main(new String[]{segdirs[i].toString(), "/tmp"}); } } LOG.info(" - deleting duplicates from indexes in " + segments); ! DeleteDuplicates.main(new String[]{segments, new File(new File(segments).getParentFile(), "tmp").toString()}); LOG.info(" - creating merged index in " + masterDir); String[] args = new String[segdirs.length + 1]; --- 173,181 ---- // Index this segment LOG.info(" - creating missing index for " + segdirs[i].getName()); ! IndexSegment.main(new String[]{segdirs[i].toString()}); } } LOG.info(" - deleting duplicates from indexes in " + segments); ! DeleteDuplicates.main(new String[]{segments, new File(segments).getParentFile().toString()}); LOG.info(" - creating merged index in " + masterDir); String[] args = new String[segdirs.length + 1]; *************** *** 255,259 **** if (runIndexer) { LOG.info("Creating new segment index..."); ! IndexSegment.main(new String[]{directory.toString(), "/tmp"}); } if (delSegs) { --- 255,259 ---- if (runIndexer) { LOG.info("Creating new segment index..."); ! IndexSegment.main(new String[]{directory.toString()}); } if (delSegs) { ------------------------------------------------------- This SF.net email is sponsored by: IT Product Guide on ITManagersJournal Use IT products in your business? Tell us what you think of them. Give us Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more http://productguide.itmanagersjournal.com/guidepromo.tmpl _______________________________________________ Nutch-cvs mailing list [EMAIL PROTECTED] https://lists.sourceforge.net/lists/listinfo/nutch-cvs