Author: dogacan Date: Wed Sep 24 01:52:19 2008 New Revision: 698471 URL: http://svn.apache.org/viewvc?rev=698471&view=rev Log: NUTCH-653 - Upgrade to hadoop 0.18
Added: lucene/nutch/trunk/lib/hadoop-0.18.1-core.jar (with props) lucene/nutch/trunk/lib/jets3t-0.6.0.jar (with props) lucene/nutch/trunk/lib/log4j-1.2.15.jar (with props) Removed: lucene/nutch/trunk/lib/hadoop-0.17.1-core.jar lucene/nutch/trunk/lib/jets3t-0.5.0.jar lucene/nutch/trunk/lib/log4j-1.2.13.jar Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.a lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1 lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1.0.0 lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.a lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1 lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1.0.0 lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Wed Sep 24 01:52:19 2008 @@ -274,6 +274,8 @@ 100. NUTCH-633 - ParseSegment no longer allow reparsing. (dogacan) +101. NUTCH-653 - Upgrade to hadoop 0.18. (dogacan) + Release 0.9 - 2007-04-02 1. Changed log4j confiquration to log to stdout on commandline Added: lucene/nutch/trunk/lib/hadoop-0.18.1-core.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/hadoop-0.18.1-core.jar?rev=698471&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/hadoop-0.18.1-core.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: lucene/nutch/trunk/lib/jets3t-0.6.0.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/jets3t-0.6.0.jar?rev=698471&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/jets3t-0.6.0.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: lucene/nutch/trunk/lib/log4j-1.2.15.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/log4j-1.2.15.jar?rev=698471&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/log4j-1.2.15.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.a URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.a?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1 URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1.0.0 URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1.0.0?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.a URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.a?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1 URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1.0.0 URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1.0.0?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=698471&r1=698470&r2=698471&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Wed Sep 24 01:52:19 2008 @@ -33,9 +33,7 @@ import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.UTF8; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; @@ -45,7 +43,6 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; -import org.apache.hadoop.mapred.OutputFormatBase; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.RecordWriter; import org.apache.hadoop.mapred.Reducer; @@ -135,42 +132,52 @@ SequenceFileInputFormat<Text, MetaWrapper> { @Override - public RecordReader<Text, MetaWrapper> getRecordReader(InputSplit split, - JobConf job, Reporter reporter) { + public RecordReader<Text, MetaWrapper> getRecordReader(final InputSplit split, + final JobConf job, Reporter reporter) throws IOException { reporter.setStatus(split.toString()); - + // find part name SegmentPart segmentPart; final String spString; + final FileSplit fSplit = (FileSplit) split; try { - segmentPart = SegmentPart.get((FileSplit) split); + segmentPart = SegmentPart.get(fSplit); spString = segmentPart.toString(); } catch (IOException e) { throw new RuntimeException("Cannot identify segment:", e); } + + final SequenceFile.Reader reader = + new SequenceFile.Reader(FileSystem.get(job), fSplit.getPath(), job); + + final Writable w; + try { + w = (Writable) reader.getValueClass().newInstance(); + } catch (Exception e) { + throw new IOException(e.toString()); + } try { - return new SequenceFileRecordReader(job, (FileSplit)split) { + return new SequenceFileRecordReader<Text, MetaWrapper>(job, fSplit) { @Override - public synchronized boolean next(WritableComparable key, Writable value) throws IOException { + public synchronized boolean next(Text key, MetaWrapper wrapper) throws IOException { LOG.debug("Running OIF.next()"); - - MetaWrapper wrapper = (MetaWrapper) value; - try { - wrapper.set((Writable)getValueClass().newInstance()); - } catch (Exception e) { - throw new IOException(e.toString()); - } - boolean res = super.next(key, (Writable) wrapper.get()); + boolean res = reader.next(key, w); + wrapper.set(w); wrapper.setMeta(SEGMENT_PART_KEY, spString); return res; } @Override - public Writable createValue() { + public synchronized void close() throws IOException { + reader.close(); + } + + @Override + public MetaWrapper createValue() { return new MetaWrapper(); }