Modified: lucene/nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java Mon Jul 21 12:20:21 2008 @@ -28,6 +28,8 @@ import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; @@ -158,7 +160,7 @@ JobConf job = new NutchJob(getConf()); job.setBoolean(FILTER_KEY, filter); job.setBoolean(NORMALIZE_KEY, normalize); - job.addInputPath(new Path(args[0])); + FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormat(TextInputFormat.class); job.setMapperClass(FG.class); job.setMapOutputKeyClass(Text.class); @@ -171,7 +173,8 @@ job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputKeyComparatorClass(Generator.HashComparator.class); - job.setOutputPath(new Path(args[1], new Path(segName, CrawlDatum.GENERATE_DIR_NAME))); + FileOutputFormat.setOutputPath(job, new Path(args[1], + new Path(segName, CrawlDatum.GENERATE_DIR_NAME))); try { JobClient.runJob(job); return 0;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java Mon Jul 21 12:20:21 2008 @@ -18,6 +18,8 @@ import java.io.IOException; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputSplit; @@ -29,7 +31,7 @@ * A input format the reads arc files. */ public class ArcInputFormat - extends FileInputFormat { + extends FileInputFormat<Text, BytesWritable> { /** * Returns the <code>RecordReader</code> for reading the arc file. @@ -38,8 +40,8 @@ * @param job The job configuration. * @param reporter The progress reporter. */ - public RecordReader getRecordReader(InputSplit split, JobConf job, - Reporter reporter) + public RecordReader<Text, BytesWritable> getRecordReader(InputSplit split, + JobConf job, Reporter reporter) throws IOException { reporter.setStatus(split.toString()); return new ArcRecordReader(job, (FileSplit)split); Modified: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java Mon Jul 21 12:20:21 2008 @@ -28,8 +28,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.util.ReflectionUtils; @@ -50,7 +48,7 @@ * @see http://www.grub.org/ */ public class ArcRecordReader - implements RecordReader { + implements RecordReader<Text, BytesWritable> { public static final Log LOG = LogFactory.getLog(ArcRecordReader.class); @@ -123,15 +121,15 @@ /** * Creates a new instance of the <code>Text</code> object for the key. */ - public WritableComparable createKey() { - return (WritableComparable)ReflectionUtils.newInstance(Text.class, conf); + public Text createKey() { + return (Text)ReflectionUtils.newInstance(Text.class, conf); } /** * Creates a new instance of the <code>BytesWritable</code> object for the key */ - public Writable createValue() { - return (Writable)ReflectionUtils.newInstance(BytesWritable.class, conf); + public BytesWritable createValue() { + return (BytesWritable)ReflectionUtils.newInstance(BytesWritable.class, conf); } /** @@ -175,7 +173,7 @@ * * @throws IOException If an error occurs while reading the record value. */ - public boolean next(WritableComparable key, Writable value) + public boolean next(Text key, BytesWritable value) throws IOException { try { Modified: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java Mon Jul 21 12:20:21 2008 @@ -28,6 +28,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; @@ -353,10 +355,10 @@ job.setJobName("ArcSegmentCreator " + arcFiles); String segName = generateSegmentName(); job.set(Nutch.SEGMENT_NAME_KEY, segName); - job.setInputPath(arcFiles); + FileInputFormat.addInputPath(job, arcFiles); job.setInputFormat(ArcInputFormat.class); job.setMapperClass(ArcSegmentCreator.class); - job.setOutputPath(new Path(segmentsOutDir, segName)); + FileOutputFormat.setOutputPath(job, new Path(segmentsOutDir, segName)); job.setOutputFormat(FetcherOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NutchWritable.class); Modified: lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java Mon Jul 21 12:20:21 2008 @@ -23,12 +23,15 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.UTF8; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapFileOutputFormat; @@ -37,7 +40,8 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.ToolBase; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.crawl.CrawlDb; import org.apache.nutch.crawl.MapWritable; @@ -52,7 +56,8 @@ * * @author Andrzej Bialecki */ -public class CrawlDbConverter extends ToolBase implements Mapper { +public class CrawlDbConverter extends Configured implements Tool, + Mapper<WritableComparable, CrawlDatum, Text, CrawlDatum> { private static final Log LOG = LogFactory.getLog(CrawlDbConverter.class); private static final String CONVERT_META_KEY = "db.converter.with.metadata"; @@ -66,7 +71,8 @@ newKey = new Text(); } - public void map(WritableComparable key, Writable value, OutputCollector output, + public void map(WritableComparable key, CrawlDatum value, + OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException { newKey.set(key.toString()); if (withMetadata) { @@ -97,7 +103,8 @@ * @param args */ public static void main(String[] args) throws Exception { - int res = new CrawlDbConverter().doMain(NutchConfiguration.create(), args); + int res = ToolRunner.run(NutchConfiguration.create(), new CrawlDbConverter(), args); + System.exit(res); } public int run(String[] args) throws Exception { @@ -123,13 +130,13 @@ withMetadata = true; job.setBoolean(CONVERT_META_KEY, withMetadata); - job.setInputPath(oldDb); + FileInputFormat.addInputPath(job, oldDb); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(CrawlDbConverter.class); job.setOutputFormat(MapFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); - job.setOutputPath(newDb); + FileOutputFormat.setOutputPath(job, newDb); try { JobClient.runJob(job); CrawlDb.install(job, new Path(args[1])); Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java Mon Jul 21 12:20:21 2008 @@ -65,6 +65,6 @@ if (!fs.exists(lockFile)) return false; if (fs.getFileStatus(lockFile).isDir()) throw new IOException("lock file " + lockFile + " exists but is a directory!"); - return fs.delete(lockFile); + return fs.delete(lockFile, false); } } Modified: lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestGenerator.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestGenerator.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestGenerator.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestGenerator.java Mon Jul 21 12:20:21 2008 @@ -53,7 +53,7 @@ protected void setUp() throws Exception { conf = CrawlDBTestUtil.createConfiguration(); fs = FileSystem.get(conf); - fs.delete(testdir); + fs.delete(testdir, true); } protected void tearDown() { @@ -62,7 +62,7 @@ private void delete(Path p) { try { - fs.delete(p); + fs.delete(p, true); } catch (IOException e) { } } Modified: lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestInjector.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestInjector.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestInjector.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestInjector.java Mon Jul 21 12:20:21 2008 @@ -52,12 +52,12 @@ urlPath=new Path(testdir,"urls"); crawldbPath=new Path(testdir,"crawldb"); fs=FileSystem.get(conf); - if (fs.exists(urlPath)) fs.delete(urlPath); - if (fs.exists(crawldbPath)) fs.delete(crawldbPath); + if (fs.exists(urlPath)) fs.delete(urlPath, false); + if (fs.exists(crawldbPath)) fs.delete(crawldbPath, true); } protected void tearDown() throws IOException{ - fs.delete(testdir); + fs.delete(testdir, true); } public void testInject() throws IOException { Modified: lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java Mon Jul 21 12:20:21 2008 @@ -95,7 +95,7 @@ public void tearDown() { try { if (fs.exists(testDir)) - fs.delete(testDir); + fs.delete(testDir, true); } catch (Exception e) { } try { reader.close(); @@ -136,7 +136,7 @@ } } reader.close(); - fs.delete(testDir); + fs.delete(testDir, true); } private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap init) throws Exception { Modified: lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java Mon Jul 21 12:20:21 2008 @@ -105,7 +105,7 @@ public void testPerformance() throws Exception { FileSystem fs = FileSystem.get(configuration); Path file = new Path(System.getProperty("java.io.tmpdir"), "mapTestFile"); - fs.delete(file); + fs.delete(file, false); org.apache.hadoop.io.SequenceFile.Writer writer = SequenceFile.createWriter( fs, configuration, file, IntWritable.class, MapWritable.class); // write map @@ -164,7 +164,7 @@ } needed = System.currentTimeMillis() - start; System.out.println("needed time for reading Text: " + needed); - fs.delete(file); + fs.delete(file, false); } /** Utility method for testing writables, from hadoop code */ Modified: lucene/nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java Mon Jul 21 12:20:21 2008 @@ -59,7 +59,7 @@ protected void setUp() throws Exception{ conf=CrawlDBTestUtil.createConfiguration(); fs=FileSystem.get(conf); - fs.delete(testdir); + fs.delete(testdir, true); urlPath=new Path(testdir,"urls"); crawldbPath=new Path(testdir,"crawldb"); segmentsPath=new Path(testdir,"segments"); @@ -69,7 +69,7 @@ protected void tearDown() throws InterruptedException, IOException{ server.stop(); - fs.delete(testdir); + fs.delete(testdir, true); } public void testFetch() throws IOException { Modified: lucene/nutch/trunk/src/test/org/apache/nutch/indexer/TestDeleteDuplicates.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/indexer/TestDeleteDuplicates.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/indexer/TestDeleteDuplicates.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/indexer/TestDeleteDuplicates.java Mon Jul 21 12:20:21 2008 @@ -110,7 +110,7 @@ } public void tearDown() throws Exception { - fs.delete(root); + fs.delete(root, true); } private void hashDuplicatesHelper(Path index, String url) throws Exception { Modified: lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestDistributedSearch.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestDistributedSearch.java?rev=678533&r1=678532&r2=678533&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestDistributedSearch.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestDistributedSearch.java Mon Jul 21 12:20:21 2008 @@ -139,6 +139,6 @@ server2.stop(); } - fs.delete(testServersPath); + fs.delete(testServersPath, true); } }