Author: ab Date: Thu Dec 7 03:21:08 2006 New Revision: 483420 URL: http://svn.apache.org/viewvc?view=rev&rev=483420 Log: Upgrade to Hadoop 0.9.1 .
Added: lucene/nutch/trunk/lib/hadoop-0.9.1.jar (with props) Removed: lucene/nutch/trunk/lib/hadoop-0.7.1.jar Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java Added: lucene/nutch/trunk/lib/hadoop-0.9.1.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/hadoop-0.9.1.jar?view=auto&rev=483420 ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/hadoop-0.9.1.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java?view=diff&rev=483420&r1=483419&r2=483420 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Thu Dec 7 03:21:08 2006 @@ -115,11 +115,13 @@ FileSystem fs = new JobClient(job).getFs(); Path old = new Path(crawlDb, "old"); Path current = new Path(crawlDb, CrawlDatum.DB_DIR_NAME); - fs.delete(old); - fs.rename(current, old); + if (fs.exists(current)) { + if (fs.exists(old)) fs.delete(old); + fs.rename(current, old); + } fs.mkdirs(crawlDb); fs.rename(newCrawlDb, current); - fs.delete(old); + if (fs.exists(old)) fs.delete(old); } public static void main(String[] args) throws Exception { Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?view=diff&rev=483420&r1=483419&r2=483420 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Thu Dec 7 03:21:08 2006 @@ -22,6 +22,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.*; +import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.fetcher.Fetcher; import org.apache.hadoop.fs.*; @@ -68,13 +69,13 @@ new Path(new Path(job.getOutputPath(), CrawlDatum.PARSE_DIR_NAME), name); final MapFile.Writer textOut = - new MapFile.Writer(fs, text.toString(), Text.class, ParseText.class); + new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class, CompressionType.RECORD); final MapFile.Writer dataOut = - new MapFile.Writer(fs, data.toString(), Text.class,ParseData.class,true); + new MapFile.Writer(job, fs, data.toString(), Text.class,ParseData.class); final SequenceFile.Writer crawlOut = - new SequenceFile.Writer(fs, crawl, Text.class, CrawlDatum.class); + SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class); return new RecordWriter() { Modified: lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java?view=diff&rev=483420&r1=483419&r2=483420 ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java Thu Dec 7 03:21:08 2006 @@ -106,8 +106,8 @@ FileSystem fs = FileSystem.get(configuration); Path file = new Path(System.getProperty("java.io.tmpdir"), "mapTestFile"); fs.delete(file); - org.apache.hadoop.io.SequenceFile.Writer writer = new SequenceFile.Writer( - fs, file, IntWritable.class, MapWritable.class); + org.apache.hadoop.io.SequenceFile.Writer writer = SequenceFile.createWriter( + fs, configuration, file, IntWritable.class, MapWritable.class); // write map System.out.println("start writing map's"); long start = System.currentTimeMillis(); @@ -139,8 +139,8 @@ fs.delete(file); // Text - System.out.println("start writing utf8's"); - writer = new SequenceFile.Writer(fs, file, IntWritable.class, Text.class); + System.out.println("start writing Text's"); + writer = SequenceFile.createWriter(fs, configuration, file, IntWritable.class, Text.class); // write map start = System.currentTimeMillis(); key = new IntWritable(); @@ -153,17 +153,17 @@ } needed = System.currentTimeMillis() - start; writer.close(); - System.out.println("needed time for writing utf8's: " + needed); + System.out.println("needed time for writing Text's: " + needed); // read map - System.out.println("start reading utf8's"); + System.out.println("start reading Text's"); reader = new SequenceFile.Reader(fs, file, configuration); start = System.currentTimeMillis(); while (reader.next(key, value)) { } needed = System.currentTimeMillis() - start; - System.out.println("needed time for reading utf8: " + needed); + System.out.println("needed time for reading Text: " + needed); fs.delete(file); }