Author: cutting Date: Tue Jan 9 13:36:24 2007 New Revision: 494602 URL: http://svn.apache.org/viewvc?view=rev&rev=494602 Log: HADOOP-868. Decrease the number of files opened during map. Contributed by Devaraj.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=494602&r1=494601&r2=494602 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Tue Jan 9 13:36:24 2007 @@ -23,6 +23,9 @@ 7. HADOOP-871. Fix a bug in bin/hadoop setting JAVA_LIBRARY_PATH. (Arun C Murthy via cutting) + 8. HADOOP-868. Decrease the number of open files during map, + respecting io.sort.factor. (Devaraj Das via cutting) + Release 0.10.0 - 2007-01-05 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java?view=diff&rev=494602&r1=494601&r2=494602 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java Tue Jan 9 13:36:24 2007 @@ -467,26 +467,24 @@ { Path [] filename = new Path[numSpills]; Path [] indexFileName = new Path[numSpills]; - FSDataInputStream in[] = new FSDataInputStream[numSpills]; - FSDataInputStream indexIn[] = new FSDataInputStream[numSpills]; for(int i = 0; i < numSpills; i++) { filename[i] = mapOutputFile.getSpillFile(getTaskId(), i); - in[i] = localFs.open(filename[i]); indexFileName[i] = mapOutputFile.getSpillIndexFile(getTaskId(), i); - indexIn[i] = localFs.open(indexFileName[i]); } //create a sorter object as we need access to the SegmentDescriptor //class and merge methods Sorter sorter = new Sorter(localFs, keyClass, valClass, job); - sorter.setFactor(numSpills); for (int parts = 0; parts < partitions; parts++){ List<SegmentDescriptor> segmentList = new ArrayList(numSpills); for(int i = 0; i < numSpills; i++) { - long segmentOffset = indexIn[i].readLong(); - long segmentLength = indexIn[i].readLong(); + FSDataInputStream indexIn = localFs.open(indexFileName[i]); + indexIn.seek(parts * 16); + long segmentOffset = indexIn.readLong(); + long segmentLength = indexIn.readLong(); + indexIn.close(); SegmentDescriptor s = sorter.new SegmentDescriptor(segmentOffset, segmentLength, filename[i]); s.preserveInput(true); @@ -513,8 +511,8 @@ finalIndexOut.close(); //cleanup for(int i = 0; i < numSpills; i++) { - in[i].close(); localFs.delete(filename[i]); - indexIn[i].close(); localFs.delete(indexFileName[i]); + localFs.delete(filename[i]); + localFs.delete(indexFileName[i]); } } }