Author: cutting
Date: Fri Jun 15 18:25:38 2012
New Revision: 1350726
URL: http://svn.apache.org/viewvc?rev=1350726&view=rev
Log:
AVRO-1112. Java: Add support for Snappy codec to newer mapreduce API.
Contributed by Matt Mead.
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroJob.java
avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroOutputFormatBase.java
avro/trunk/lang/java/mapred/src/test/java/org/apache/avro/mapreduce/TestAvroKeyOutputFormat.java
Modified: avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1350726&r1=1350725&r2=1350726&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Fri Jun 15 18:25:38 2012
@@ -7,6 +7,9 @@ Avro 1.7.1 (unreleased)
AVRO-1106. Java: Add AvroMultipleOutputs for newer mapreduce API.
(Ashish Nagavaram via cutting)
+ AVRO-1112. Java: Add support for Snappy codec to newer mapreduce API.
+ (Matt Mead via cutting)
+
IMPROVEMENTS
BUG FIXES
Modified:
avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroJob.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroJob.java?rev=1350726&r1=1350725&r2=1350726&view=diff
==============================================================================
---
avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroJob.java
(original)
+++
avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroJob.java
Fri Jun 15 18:25:38 2012
@@ -57,6 +57,10 @@ public final class AvroJob {
/** Configuration key for the output value schema. */
private static final String CONF_OUTPUT_VALUE_SCHEMA =
"avro.schema.output.value";
+ /** The configuration key for a job's output compression codec.
+ * This takes one of the strings registered in {@link
org.apache.avro.file.CodecFactory} */
+ public static final String CONF_OUTPUT_CODEC = "avro.output.codec";
+
/**
* Sets the job input key schema.
*
Modified:
avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroOutputFormatBase.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroOutputFormatBase.java?rev=1350726&r1=1350725&r2=1350726&view=diff
==============================================================================
---
avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroOutputFormatBase.java
(original)
+++
avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapreduce/AvroOutputFormatBase.java
Fri Jun 15 18:25:38 2012
@@ -23,6 +23,7 @@ import java.io.OutputStream;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileConstants;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
@@ -42,11 +43,17 @@ public abstract class AvroOutputFormatBa
*/
protected static CodecFactory getCompressionCodec(TaskAttemptContext
context) {
if (FileOutputFormat.getCompressOutput(context)) {
- // Deflate compression.
- int compressionLevel = context.getConfiguration().getInt(
- org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
- org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
- return CodecFactory.deflateCodec(compressionLevel);
+ // Default to deflate compression.
+ String outputCodec = context.getConfiguration()
+ .get(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.DEFLATE_CODEC);
+ if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) {
+ int compressionLevel = context.getConfiguration().getInt(
+ org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
+ org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
+ return CodecFactory.deflateCodec(compressionLevel);
+ } else {
+ return CodecFactory.fromString(outputCodec);
+ }
}
// No compression.
Modified:
avro/trunk/lang/java/mapred/src/test/java/org/apache/avro/mapreduce/TestAvroKeyOutputFormat.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/mapred/src/test/java/org/apache/avro/mapreduce/TestAvroKeyOutputFormat.java?rev=1350726&r1=1350725&r2=1350726&view=diff
==============================================================================
---
avro/trunk/lang/java/mapred/src/test/java/org/apache/avro/mapreduce/TestAvroKeyOutputFormat.java
(original)
+++
avro/trunk/lang/java/mapred/src/test/java/org/apache/avro/mapreduce/TestAvroKeyOutputFormat.java
Fri Jun 15 18:25:38 2012
@@ -26,6 +26,7 @@ import java.io.OutputStream;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileConstants;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroOutputFormat;
import org.apache.hadoop.conf.Configuration;
@@ -58,6 +59,14 @@ public class TestAvroKeyOutputFormat {
testGetRecordWriter(conf, CodecFactory.deflateCodec(3));
}
+ @Test
+ public void testWithSnappyCode() throws IOException {
+ Configuration conf = new Configuration();
+ conf.setBoolean("mapred.output.compress", true);
+ conf.set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.SNAPPY_CODEC);
+ testGetRecordWriter(conf, CodecFactory.snappyCodec());
+ }
+
/**
* Tests that the record writer is contructed and returned correclty from
the output format.
*/