Author: dogacan
Date: Wed Sep 24 01:52:19 2008
New Revision: 698471

URL: http://svn.apache.org/viewvc?rev=698471&view=rev
Log:
NUTCH-653 - Upgrade to hadoop 0.18

Added:
    lucene/nutch/trunk/lib/hadoop-0.18.1-core.jar   (with props)
    lucene/nutch/trunk/lib/jets3t-0.6.0.jar   (with props)
    lucene/nutch/trunk/lib/log4j-1.2.15.jar   (with props)
Removed:
    lucene/nutch/trunk/lib/hadoop-0.17.1-core.jar
    lucene/nutch/trunk/lib/jets3t-0.5.0.jar
    lucene/nutch/trunk/lib/log4j-1.2.13.jar
Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.a
    lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so
    lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1
    lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1.0.0
    lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.a
    lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so
    lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1
    lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1.0.0
    lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Sep 24 01:52:19 2008
@@ -274,6 +274,8 @@
 100. NUTCH-633 - ParseSegment no longer allow reparsing.
      (dogacan)
 
+101. NUTCH-653 - Upgrade to hadoop 0.18. (dogacan)
+
 Release 0.9 - 2007-04-02
 
  1. Changed log4j confiquration to log to stdout on commandline

Added: lucene/nutch/trunk/lib/hadoop-0.18.1-core.jar
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/hadoop-0.18.1-core.jar?rev=698471&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/hadoop-0.18.1-core.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/lib/jets3t-0.6.0.jar
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/jets3t-0.6.0.jar?rev=698471&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/jets3t-0.6.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/lib/log4j-1.2.15.jar
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/log4j-1.2.15.jar?rev=698471&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/log4j-1.2.15.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.a
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.a?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1.0.0
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1.0.0?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.a
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.a?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1.0.0
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1.0.0?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
Binary files - no diff available.

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=698471&r1=698470&r2=698471&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Wed 
Sep 24 01:52:19 2008
@@ -33,9 +33,7 @@
 import org.apache.hadoop.io.MapFile;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
@@ -45,7 +43,6 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.OutputFormatBase;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.RecordWriter;
 import org.apache.hadoop.mapred.Reducer;
@@ -135,42 +132,52 @@
     SequenceFileInputFormat<Text, MetaWrapper> {
     
     @Override
-    public RecordReader<Text, MetaWrapper> getRecordReader(InputSplit split,
-        JobConf job, Reporter reporter) {
+    public RecordReader<Text, MetaWrapper> getRecordReader(final InputSplit 
split,
+        final JobConf job, Reporter reporter) throws IOException {
 
       reporter.setStatus(split.toString());
-
+      
       // find part name
       SegmentPart segmentPart;
       final String spString;
+      final FileSplit fSplit = (FileSplit) split;
       try {
-        segmentPart = SegmentPart.get((FileSplit) split);
+        segmentPart = SegmentPart.get(fSplit);
         spString = segmentPart.toString();
       } catch (IOException e) {
         throw new RuntimeException("Cannot identify segment:", e);
       }
+      
+      final SequenceFile.Reader reader =
+        new SequenceFile.Reader(FileSystem.get(job), fSplit.getPath(), job);
+      
+      final Writable w;
+      try {
+        w = (Writable) reader.getValueClass().newInstance();
+      } catch (Exception e) {
+        throw new IOException(e.toString());
+      }
 
       try {
-        return new SequenceFileRecordReader(job, (FileSplit)split) {
+        return new SequenceFileRecordReader<Text, MetaWrapper>(job, fSplit) {
           
           @Override
-          public synchronized boolean next(WritableComparable key, Writable 
value) throws IOException {
+          public synchronized boolean next(Text key, MetaWrapper wrapper) 
throws IOException {
             LOG.debug("Running OIF.next()");
-            
-            MetaWrapper wrapper = (MetaWrapper) value;
-            try {
-              wrapper.set((Writable)getValueClass().newInstance());
-            } catch (Exception e) {
-              throw new IOException(e.toString());
-            }
 
-            boolean res = super.next(key, (Writable) wrapper.get());
+            boolean res = reader.next(key, w);
+            wrapper.set(w);
             wrapper.setMeta(SEGMENT_PART_KEY, spString);
             return res;
           }
           
           @Override
-          public Writable createValue() {
+          public synchronized void close() throws IOException {
+            reader.close();
+          }
+          
+          @Override
+          public MetaWrapper createValue() {
             return new MetaWrapper();
           }
           


Reply via email to