Author: cdouglas
Date: Wed Jan 13 10:47:01 2010
New Revision: 898713
URL: http://svn.apache.org/viewvc?rev=898713&view=rev
Log:
HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
GzipCodec. Contributed by Aaron Kimball
Modified:
hadoop/common/branches/branch-0.20/CHANGES.txt
hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/io/compress/GzipCodec.java
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/compress/TestCodec.java
Modified: hadoop/common/branches/branch-0.20/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/CHANGES.txt?rev=898713&r1=898712&r2=898713&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20/CHANGES.txt Wed Jan 13 10:47:01 2010
@@ -87,6 +87,9 @@
HADOOP-5623. Fixes a problem to do with status messages getting overwritten
in streaming jobs. (Rick Cox and Jothi Padmanabhan via tomwhite)
+ HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
+ GzipCodec. (Aaron Kimball via cdouglas)
+
Release 0.20.1 - 2009-09-01
INCOMPATIBLE CHANGES
Modified:
hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/io/compress/GzipCodec.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/io/compress/GzipCodec.java?rev=898713&r1=898712&r2=898713&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/io/compress/GzipCodec.java
(original)
+++
hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/io/compress/GzipCodec.java
Wed Jan 13 10:47:01 2010
@@ -161,7 +161,7 @@
public Class<? extends Compressor> getCompressorType() {
return ZlibFactory.isNativeZlibLoaded(conf)
? GzipZlibCompressor.class
- : BuiltInZlibDeflater.class;
+ : null;
}
public CompressionInputStream createInputStream(InputStream in)
@@ -192,7 +192,7 @@
public Class<? extends Decompressor> getDecompressorType() {
return ZlibFactory.isNativeZlibLoaded(conf)
? GzipZlibDecompressor.class
- : BuiltInZlibInflater.class;
+ : null;
}
public String getDefaultExtension() {
Modified:
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/compress/TestCodec.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/compress/TestCodec.java?rev=898713&r1=898712&r2=898713&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/compress/TestCodec.java
(original)
+++
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/compress/TestCodec.java
Wed Jan 13 10:47:01 2010
@@ -19,10 +19,21 @@
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
import java.util.Random;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
import junit.framework.TestCase;
@@ -41,6 +52,9 @@
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.compress.CompressionOutputStream;
+import org.apache.hadoop.io.compress.CompressorStream;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
import org.apache.hadoop.io.compress.zlib.ZlibFactory;
public class TestCodec extends TestCase {
@@ -246,4 +260,151 @@
super(name);
}
+ public void testCodecPoolAndGzipDecompressor() {
+ // BuiltInZlibInflater should not be used as the GzipCodec decompressor.
+ // Assert that this is the case.
+
+ // Don't use native libs for this test.
+ Configuration conf = new Configuration();
+ conf.setBoolean("hadoop.native.lib", false);
+ assertFalse("ZlibFactory is using native libs against request",
+ ZlibFactory.isNativeZlibLoaded(conf));
+
+ // This should give us a BuiltInZlibInflater.
+ Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
+ assertNotNull("zlibDecompressor is null!", zlibDecompressor);
+ assertTrue("ZlibFactory returned unexpected inflator",
+ zlibDecompressor instanceof BuiltInZlibInflater);
+
+ // Asking for a decompressor directly from GzipCodec should return null;
+ // its createOutputStream() just wraps the existing stream in a
+ // java.util.zip.GZIPOutputStream.
+ CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
+ CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
+ assertTrue("Codec for .gz file is not GzipCodec", codec instanceof
GzipCodec);
+ Decompressor codecDecompressor = codec.createDecompressor();
+ if (null != codecDecompressor) {
+ fail("Got non-null codecDecompressor: " + codecDecompressor);
+ }
+
+ // Asking the CodecPool for a decompressor for GzipCodec
+ // should return null as well.
+ Decompressor poolDecompressor = CodecPool.getDecompressor(codec);
+ if (null != poolDecompressor) {
+ fail("Got non-null poolDecompressor: " + poolDecompressor);
+ }
+
+ // If we then ensure that the pool is populated...
+ CodecPool.returnDecompressor(zlibDecompressor);
+
+ // Asking the pool another time should still not bind this to GzipCodec.
+ poolDecompressor = CodecPool.getDecompressor(codec);
+ if (null != poolDecompressor) {
+ fail("Second time, got non-null poolDecompressor: "
+ + poolDecompressor);
+ }
+ }
+
+ public void testGzipCodecRead() throws IOException {
+ // Create a gzipped file and try to read it back, using a decompressor
+ // from the CodecPool.
+
+ // Don't use native libs for this test.
+ Configuration conf = new Configuration();
+ conf.setBoolean("hadoop.native.lib", false);
+ assertFalse("ZlibFactory is using native libs against request",
+ ZlibFactory.isNativeZlibLoaded(conf));
+
+ // Ensure that the CodecPool has a BuiltInZlibInflater in it.
+ Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
+ assertNotNull("zlibDecompressor is null!", zlibDecompressor);
+ assertTrue("ZlibFactory returned unexpected inflator",
+ zlibDecompressor instanceof BuiltInZlibInflater);
+ CodecPool.returnDecompressor(zlibDecompressor);
+
+ // Now create a GZip text file.
+ String tmpDir = System.getProperty("test.build.data", "/tmp/");
+ Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
+ BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
+ new GZIPOutputStream(new FileOutputStream(f.toString()))));
+ final String msg = "This is the message in the file!";
+ bw.write(msg);
+ bw.close();
+
+ // Now read it back, using the CodecPool to establish the
+ // decompressor to use.
+ CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
+ CompressionCodec codec = ccf.getCodec(f);
+ Decompressor decompressor = CodecPool.getDecompressor(codec);
+ FileSystem fs = FileSystem.getLocal(conf);
+ InputStream is = fs.open(f);
+ is = codec.createInputStream(is, decompressor);
+ BufferedReader br = new BufferedReader(new InputStreamReader(is));
+ String line = br.readLine();
+ assertEquals("Didn't get the same message back!", msg, line);
+ br.close();
+ }
+
+ private void verifyGzipFile(String filename, String msg) throws IOException {
+ BufferedReader r = new BufferedReader(new InputStreamReader(
+ new GZIPInputStream(new FileInputStream(filename))));
+ try {
+ String line = r.readLine();
+ assertEquals("Got invalid line back from " + filename, msg, line);
+ } finally {
+ r.close();
+ new File(filename).delete();
+ }
+ }
+
+ public void testGzipCodecWrite() throws IOException {
+ // Create a gzipped file using a compressor from the CodecPool,
+ // and try to read it back via the regular GZIPInputStream.
+
+ // Don't use native libs for this test.
+ Configuration conf = new Configuration();
+ conf.setBoolean("hadoop.native.lib", false);
+ assertFalse("ZlibFactory is using native libs against request",
+ ZlibFactory.isNativeZlibLoaded(conf));
+
+ // Ensure that the CodecPool has a BuiltInZlibDeflater in it.
+ Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
+ assertNotNull("zlibCompressor is null!", zlibCompressor);
+ assertTrue("ZlibFactory returned unexpected deflator",
+ zlibCompressor instanceof BuiltInZlibDeflater);
+ CodecPool.returnCompressor(zlibCompressor);
+
+ // Create a GZIP text file via the Compressor interface.
+ CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
+ CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
+ assertTrue("Codec for .gz file is not GzipCodec", codec instanceof
GzipCodec);
+
+ final String msg = "This is the message we are going to compress.";
+ final String tmpDir = System.getProperty("test.build.data", "/tmp/");
+ final String fileName = new Path(new Path(tmpDir),
+ "testGzipCodecWrite.txt.gz").toString();
+
+ BufferedWriter w = null;
+ Compressor gzipCompressor = CodecPool.getCompressor(codec);
+ if (null != gzipCompressor) {
+ // If it gives us back a Compressor, we should be able to use this
+ // to write files we can then read back with Java's gzip tools.
+ OutputStream os = new CompressorStream(new FileOutputStream(fileName),
+ gzipCompressor);
+ w = new BufferedWriter(new OutputStreamWriter(os));
+ w.write(msg);
+ w.close();
+ CodecPool.returnCompressor(gzipCompressor);
+
+ verifyGzipFile(fileName, msg);
+ }
+
+ // Create a gzip text file via codec.getOutputStream().
+ w = new BufferedWriter(new OutputStreamWriter(
+ codec.createOutputStream(new FileOutputStream(fileName))));
+ w.write(msg);
+ w.close();
+
+ verifyGzipFile(fileName, msg);
+ }
}