Author: cutting Date: Mon Jun 18 15:26:59 2007 New Revision: 548512 URL: http://svn.apache.org/viewvc?view=rev&rev=548512 Log: HADOOP-1442. Fix handling of zero-length input splits. Contributed by Senthil.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=548512&r1=548511&r2=548512 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Mon Jun 18 15:26:59 2007 @@ -173,6 +173,9 @@ mismatch during datanode registration. (Konstantin Shvachko via cutting) + 54. HADOOP-1442. Fix handling of zero-length input splits. + (Senthil Subramanian via cutting) + Release 0.13.0 - 2007-06-08 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java?view=diff&rev=548512&r1=548511&r2=548512 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java Mon Jun 18 15:26:59 2007 @@ -167,7 +167,7 @@ Path file = files[i]; FileSystem fs = file.getFileSystem(job); long length = fs.getLength(file); - if (isSplitable(fs, file)) { + if ((length != 0) && isSplitable(fs, file)) { long blockSize = fs.getBlockSize(file); long splitSize = computeSplitSize(goalSize, minSize, blockSize); @@ -183,9 +183,7 @@ bytesRemaining, job)); } } else { - if (length != 0) { - splits.add(new FileSplit(file, 0, length, job)); - } + splits.add(new FileSplit(file, 0, length, job)); } } LOG.debug("Total # of splits: " + splits.size()); Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java?view=diff&rev=548512&r1=548511&r2=548512 ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java (original) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java Mon Jun 18 15:26:59 2007 @@ -89,6 +89,12 @@ InputSplit[] splits = format.getSplits(job, numSplits); LOG.debug("splitting: got = " + splits.length); + if (length == 0) { + assertEquals("Files of length 0 are not returned from FileInputFormat.getSplits().", + 1, splits.length); + assertEquals("Empty file length == 0", 0, splits[0].getLength()); + } + // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.length; j++) { @@ -224,6 +230,25 @@ results.get(0).toString()); assertEquals("splits[1][1]", "of gzip", results.get(1).toString()); + } + + /** + * Test using the gzip codec and an empty input file + */ + public static void testGzipEmpty() throws IOException { + JobConf job = new JobConf(); + CompressionCodec gzip = new GzipCodec(); + ReflectionUtils.setConf(gzip, job); + localFs.delete(workDir); + writeFile(localFs, new Path(workDir, "empty.gz"), gzip, ""); + job.setInputPath(workDir); + TextInputFormat format = new TextInputFormat(); + format.configure(job); + InputSplit[] splits = format.getSplits(job, 100); + assertEquals("Compressed files of length 0 are not returned from FileInputFormat.getSplits().", + 1, splits.length); + List<Text> results = readSplit(format, splits[0], job); + assertEquals("Compressed empty file length == 0", 0, results.size()); } public static void main(String[] args) throws Exception {