Repository: spark Updated Branches: refs/heads/master c235b83e2 -> 676f98289
[SPARK-2953] Allow using short names for io compression codecs Instead of requiring "org.apache.spark.io.LZ4CompressionCodec", it is easier for users if Spark just accepts "lz4", "lzf", "snappy". Author: Reynold Xin <[email protected]> Closes #1873 from rxin/compressionCodecShortForm and squashes the following commits: 9f50962 [Reynold Xin] Specify short-form compression codec names first. 63f78ee [Reynold Xin] Updated configuration documentation. 47b3848 [Reynold Xin] [SPARK-2953] Allow using short names for io compression codecs Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/676f9828 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/676f9828 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/676f9828 Branch: refs/heads/master Commit: 676f98289dad61c091bb45bd35a2b9613b22d64a Parents: c235b83 Author: Reynold Xin <[email protected]> Authored: Tue Aug 12 22:50:29 2014 -0700 Committer: Reynold Xin <[email protected]> Committed: Tue Aug 12 22:50:29 2014 -0700 ---------------------------------------------------------------------- .../org/apache/spark/io/CompressionCodec.scala | 11 +++++++++-- .../apache/spark/io/CompressionCodecSuite.scala | 18 ++++++++++++++++++ docs/configuration.md | 8 +++++--- 3 files changed, 32 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/676f9828/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala index 1b66218..ef9c43e 100644 --- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala +++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala @@ -46,17 +46,24 @@ trait CompressionCodec { private[spark] object CompressionCodec { + + private val shortCompressionCodecNames = Map( + "lz4" -> classOf[LZ4CompressionCodec].getName, + "lzf" -> classOf[LZFCompressionCodec].getName, + "snappy" -> classOf[SnappyCompressionCodec].getName) + def createCodec(conf: SparkConf): CompressionCodec = { createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC)) } def createCodec(conf: SparkConf, codecName: String): CompressionCodec = { - val ctor = Class.forName(codecName, true, Utils.getContextOrSparkClassLoader) + val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName) + val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader) .getConstructor(classOf[SparkConf]) ctor.newInstance(conf).asInstanceOf[CompressionCodec] } - val DEFAULT_COMPRESSION_CODEC = classOf[SnappyCompressionCodec].getName + val DEFAULT_COMPRESSION_CODEC = "snappy" } http://git-wip-us.apache.org/repos/asf/spark/blob/676f9828/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala index 3f882a7..25be7f2 100644 --- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala +++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala @@ -56,15 +56,33 @@ class CompressionCodecSuite extends FunSuite { testCodec(codec) } + test("lz4 compression codec short form") { + val codec = CompressionCodec.createCodec(conf, "lz4") + assert(codec.getClass === classOf[LZ4CompressionCodec]) + testCodec(codec) + } + test("lzf compression codec") { val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName) assert(codec.getClass === classOf[LZFCompressionCodec]) testCodec(codec) } + test("lzf compression codec short form") { + val codec = CompressionCodec.createCodec(conf, "lzf") + assert(codec.getClass === classOf[LZFCompressionCodec]) + testCodec(codec) + } + test("snappy compression codec") { val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName) assert(codec.getClass === classOf[SnappyCompressionCodec]) testCodec(codec) } + + test("snappy compression codec short form") { + val codec = CompressionCodec.createCodec(conf, "snappy") + assert(codec.getClass === classOf[SnappyCompressionCodec]) + testCodec(codec) + } } http://git-wip-us.apache.org/repos/asf/spark/blob/676f9828/docs/configuration.md ---------------------------------------------------------------------- diff --git a/docs/configuration.md b/docs/configuration.md index 617a72a..8136bd6 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -373,10 +373,12 @@ Apart from these, the following properties are also available, and may be useful </tr> <tr> <td><code>spark.io.compression.codec</code></td> - <td>org.apache.spark.io.<br />SnappyCompressionCodec</td> + <td>snappy</td> <td> - The codec used to compress internal data such as RDD partitions and shuffle outputs. - By default, Spark provides three codecs: <code>org.apache.spark.io.LZ4CompressionCodec</code>, + The codec used to compress internal data such as RDD partitions and shuffle outputs. By default, + Spark provides three codecs: <code>lz4</code>, <code>lzf</code>, and <code>snappy</code>. You + can also use fully qualified class names to specify the codec, e.g. + <code>org.apache.spark.io.LZ4CompressionCodec</code>, <code>org.apache.spark.io.LZFCompressionCodec</code>, and <code>org.apache.spark.io.SnappyCompressionCodec</code>. </td> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
