Author: cutting Date: Wed Jun 28 14:54:53 2006 New Revision: 417884 URL: http://svn.apache.org/viewvc?rev=417884&view=rev Log: NUTCH-312. Upgrade to Hadoop 0.4.0.
Added: lucene/nutch/trunk/lib/commons-cli-2.0-SNAPSHOT.jar (with props) lucene/nutch/trunk/lib/hadoop-0.4.0.jar (with props) Removed: lucene/nutch/trunk/lib/hadoop-0.3.2.jar Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Added: lucene/nutch/trunk/lib/commons-cli-2.0-SNAPSHOT.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/commons-cli-2.0-SNAPSHOT.jar?rev=417884&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/commons-cli-2.0-SNAPSHOT.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: lucene/nutch/trunk/lib/hadoop-0.4.0.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/hadoop-0.4.0.jar?rev=417884&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/hadoop-0.4.0.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java?rev=417884&r1=417883&r2=417884&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java Wed Jun 28 14:54:53 2006 @@ -31,6 +31,7 @@ import org.apache.hadoop.mapred.RecordWriter; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.util.Progressable; import org.apache.nutch.parse.ParseOutputFormat; import org.apache.nutch.protocol.Content; @@ -45,7 +46,8 @@ public RecordWriter getRecordWriter(final FileSystem fs, final JobConf job, - final String name) throws IOException { + final String name, + final Progressable progress) throws IOException { final Path fetch = new Path(new Path(job.getOutputPath(), CrawlDatum.FETCH_DIR_NAME), name); @@ -66,7 +68,7 @@ } if (Fetcher.isParsing(job)) { - parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name); + parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, null); } } Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=417884&r1=417883&r2=417884&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Wed Jun 28 14:54:53 2006 @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.*; import org.apache.hadoop.conf.*; import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.Progressable; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.NutchJob; @@ -276,7 +277,8 @@ /** Write nothing. */ public RecordWriter getRecordWriter(final FileSystem fs, final JobConf job, - final String name) throws IOException { + final String name, + final Progressable progress) throws IOException { return new RecordWriter() { public void write(WritableComparable key, Writable value) throws IOException { Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?rev=417884&r1=417883&r2=417884&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Wed Jun 28 14:54:53 2006 @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.*; import org.apache.hadoop.conf.*; import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.Progressable; import org.apache.nutch.parse.*; import org.apache.nutch.analysis.*; @@ -79,7 +80,7 @@ public static class OutputFormat extends org.apache.hadoop.mapred.OutputFormatBase { public RecordWriter getRecordWriter(final FileSystem fs, JobConf job, - String name) throws IOException { + String name, Progressable progress) throws IOException { final Path perm = new Path(job.getOutputPath(), name); final Path temp = job.getLocalPath("index/_"+Integer.toString(new Random().nextInt())); Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?rev=417884&r1=417883&r2=417884&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Wed Jun 28 14:54:53 2006 @@ -31,6 +31,7 @@ import org.apache.nutch.net.*; import java.io.*; +import org.apache.hadoop.util.Progressable; /* Parse content in a segment. */ public class ParseOutputFormat implements OutputFormat { @@ -46,7 +47,7 @@ } public RecordWriter getRecordWriter(FileSystem fs, JobConf job, - String name) throws IOException { + String name, Progressable progress) throws IOException { this.urlNormalizer = new UrlNormalizerFactory(job).getNormalizer(); this.filters = new URLFilters(job); Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=417884&r1=417883&r2=417884&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Wed Jun 28 14:54:53 2006 @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.Progressable; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.crawl.Generator; import org.apache.nutch.fetcher.Fetcher; @@ -168,7 +169,7 @@ public static class SegmentOutputFormat extends org.apache.hadoop.mapred.OutputFormatBase { private static final String DEFAULT_SLICE = "default"; - public RecordWriter getRecordWriter(final FileSystem fs, final JobConf job, final String name) throws IOException { + public RecordWriter getRecordWriter(final FileSystem fs, final JobConf job, final String name, final Progressable progress) throws IOException { return new RecordWriter() { MapFile.Writer c_out = null; MapFile.Writer f_out = null; Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=417884&r1=417883&r2=417884&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Wed Jun 28 14:54:53 2006 @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.Progressable; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.parse.ParseData; import org.apache.nutch.parse.ParseText; @@ -70,7 +71,7 @@ /** Implements a text output format */ public static class TextOutputFormat extends org.apache.hadoop.mapred.OutputFormatBase { - public RecordWriter getRecordWriter(final FileSystem fs, JobConf job, String name) throws IOException { + public RecordWriter getRecordWriter(final FileSystem fs, JobConf job, String name, Progressable progress) throws IOException { final Path segmentDumpFile = new Path(job.getOutputPath(), name);