Author: vinodkv Date: Fri May 24 00:42:53 2013 New Revision: 1485908 URL: http://svn.apache.org/r1485908 Log: MAPREDUCE-5230. Bring back NLineInputFormat.createFileSplit for binary compatibility with mapred in 1.x Contributed by Mayank Bansal. svn merge --ignore-ancestry -c 1485906 ../../trunk/
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1485908&r1=1485907&r2=1485908&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Fri May 24 00:42:53 2013 @@ -93,6 +93,9 @@ Release 2.0.5-beta - UNRELEASED MAPREDUCE-5246. Specify application-type at the time of job submission after YARN-563. (Mayank Bansal via vinodkv) + MAPREDUCE-5230. Bring back NLineInputFormat.createFileSplit for binary + compatibility with mapred in 1.x (Mayank Bansal via vinodkv) + OPTIMIZATIONS MAPREDUCE-4974. Optimising the LineRecordReader initialize() method Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java?rev=1485908&r1=1485907&r2=1485908&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java Fri May 24 00:42:53 2013 @@ -24,6 +24,7 @@ import java.util.ArrayList; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; @@ -90,4 +91,21 @@ public class NLineInputFormat extends Fi public void configure(JobConf conf) { N = conf.getInt("mapreduce.input.lineinputformat.linespermap", 1); } + + /** + * NLineInputFormat uses LineRecordReader, which always reads + * (and consumes) at least one character out of its upper split + * boundary. So to make sure that each mapper gets N lines, we + * move back the upper split limits of each split + * by one character here. + * @param fileName Path of file + * @param begin the position of the first byte in the file to process + * @param length number of bytes in InputSplit + * @return FileSplit + */ + protected static FileSplit createFileSplit(Path fileName, long begin, long length) { + return (begin == 0) + ? new FileSplit(fileName, begin, length - 1, new String[] {}) + : new FileSplit(fileName, begin - 1, length, new String[] {}); + } }