Author: ab Date: Thu May 31 14:23:45 2007 New Revision: 543264 URL: http://svn.apache.org/viewvc?view=rev&rev=543264 Log: NUTCH-392 - OutputFormat implementations should pass on Progressable.
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=543264&r1=543263&r2=543264 ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Thu May 31 14:23:45 2007 @@ -26,6 +26,10 @@ 9. NUTCH-61 - Support for adaptive re-fetch interval and detection of unmodified content. (ab) + +10. NUTCH-392 - OutputFormat implementations should pass on Progressable. + (cutting via ab) + Release 0.9 - 2007-04-02 Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java?view=diff&rev=543264&r1=543263&r2=543264 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java Thu May 31 14:23:45 2007 @@ -28,6 +28,7 @@ import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.RecordWriter; @@ -58,7 +59,8 @@ new Path(new Path(job.getOutputPath(), Content.DIR_NAME), name); final MapFile.Writer fetchOut = - new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class); + new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class, + CompressionType.NONE, progress); return new RecordWriter() { private MapFile.Writer contentOut; @@ -67,11 +69,12 @@ { if (Fetcher.isStoringContent(job)) { contentOut = new MapFile.Writer(job, fs, content.toString(), - Text.class, Content.class); + Text.class, Content.class, + CompressionType.NONE, progress); } if (Fetcher.isParsing(job)) { - parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, null); + parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, progress); } } Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?view=diff&rev=543264&r1=543263&r2=543264 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Thu May 31 14:23:45 2007 @@ -60,7 +60,7 @@ public static class OutputFormat extends org.apache.hadoop.mapred.OutputFormatBase { public RecordWriter getRecordWriter(final FileSystem fs, JobConf job, - String name, Progressable progress) throws IOException { + String name, final Progressable progress) throws IOException { final Path perm = new Path(job.getOutputPath(), name); final Path temp = job.getLocalPath("index/_"+Integer.toString(new Random().nextInt())); @@ -95,6 +95,7 @@ " (" + doc.get("lang") + ")"); } writer.addDocument(doc, analyzer); + progress.progress(); } public void close(final Reporter reporter) throws IOException { Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?view=diff&rev=543264&r1=543263&r2=543264 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Thu May 31 14:23:45 2007 @@ -68,13 +68,16 @@ new Path(new Path(job.getOutputPath(), CrawlDatum.PARSE_DIR_NAME), name); final MapFile.Writer textOut = - new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class, CompressionType.RECORD); + new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class, + CompressionType.RECORD, progress); final MapFile.Writer dataOut = - new MapFile.Writer(job, fs, data.toString(), Text.class,ParseData.class); + new MapFile.Writer(job, fs, data.toString(), Text.class, ParseData.class, + CompressionType.RECORD, progress); final SequenceFile.Writer crawlOut = - SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class); + SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class, + CompressionType.NONE, progress); return new RecordWriter() { Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?view=diff&rev=543264&r1=543263&r2=543264 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Thu May 31 14:23:45 2007 @@ -36,6 +36,7 @@ import org.apache.hadoop.io.UTF8; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobClient; @@ -237,7 +238,7 @@ } else { wname = new Path(new Path(new Path(job.getOutputPath(), segmentName + "-" + slice), dirName), name); } - res = new SequenceFile.Writer(fs, job, wname, Text.class, CrawlDatum.class); + res = new SequenceFile.Writer(fs, job, wname, Text.class, CrawlDatum.class, progress, new SequenceFile.Metadata()); sliceWriters.put(slice + dirName, res); return res; } @@ -253,7 +254,7 @@ } else { wname = new Path(new Path(new Path(job.getOutputPath(), segmentName + "-" + slice), dirName), name); } - res = new MapFile.Writer(job, fs, wname.toString(), Text.class, clazz); + res = new MapFile.Writer(job, fs, wname.toString(), Text.class, clazz, CompressionType.RECORD, progress); sliceWriters.put(slice + dirName, res); return res; } Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?view=diff&rev=543264&r1=543263&r2=543264 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Thu May 31 14:23:45 2007 @@ -94,7 +94,7 @@ /** Implements a text output format */ public static class TextOutputFormat extends org.apache.hadoop.mapred.OutputFormatBase { - public RecordWriter getRecordWriter(final FileSystem fs, JobConf job, String name, Progressable progress) throws IOException { + public RecordWriter getRecordWriter(final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path segmentDumpFile = new Path(job.getOutputPath(), name); ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs