NLineInputFormat.java

vinodkv Thu, 23 May 2013 17:43:18 -0700

Author: vinodkv
Date: Fri May 24 00:42:53 2013
New Revision: 1485908

URL: http://svn.apache.org/r1485908
Log:
MAPREDUCE-5230. Bring back NLineInputFormat.createFileSplit for binary 
compatibility with mapred in 1.x Contributed by Mayank Bansal.
svn merge --ignore-ancestry -c 1485906 ../../trunk/


Modified:
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
    
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1485908&r1=1485907&r2=1485908&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt 
(original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Fri 
May 24 00:42:53 2013
@@ -93,6 +93,9 @@ Release 2.0.5-beta - UNRELEASED
     MAPREDUCE-5246. Specify application-type at the time of job submission 
after
     YARN-563. (Mayank Bansal via vinodkv)
 
+    MAPREDUCE-5230. Bring back NLineInputFormat.createFileSplit for binary
+    compatibility with mapred in 1.x (Mayank Bansal via vinodkv)
+
   OPTIMIZATIONS
 
     MAPREDUCE-4974. Optimising the LineRecordReader initialize() method 

Modified: 
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java?rev=1485908&r1=1485907&r2=1485908&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java
 (original)
+++ 
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java
 Fri May 24 00:42:53 2013
@@ -24,6 +24,7 @@ import java.util.ArrayList;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -90,4 +91,21 @@ public class NLineInputFormat extends Fi
   public void configure(JobConf conf) {
     N = conf.getInt("mapreduce.input.lineinputformat.linespermap", 1);
   }
+  
+  /**
+   * NLineInputFormat uses LineRecordReader, which always reads
+   * (and consumes) at least one character out of its upper split
+   * boundary. So to make sure that each mapper gets N lines, we
+   * move back the upper split limits of each split 
+   * by one character here.
+   * @param fileName  Path of file
+   * @param begin  the position of the first byte in the file to process
+   * @param length  number of bytes in InputSplit
+   * @return  FileSplit
+   */
+  protected static FileSplit createFileSplit(Path fileName, long begin, long 
length) {
+    return (begin == 0) 
+    ? new FileSplit(fileName, begin, length - 1, new String[] {})
+    : new FileSplit(fileName, begin - 1, length, new String[] {});
+  }
 }

svn commit: r1485908 - in /hadoop/common/branches/branch-2/hadoop-mapreduce-project: CHANGES.txt hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java

Reply via email to