JoshRosen commented on a change in pull request #24905: [SPARK-28102] Add
configuration for selecting LZ4 implementation (safe, unsafe, JNI)
URL: https://github.com/apache/spark/pull/24905#discussion_r295105030
##########
File path: core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
##########
@@ -118,14 +119,46 @@ private[spark] object CompressionCodec {
@DeveloperApi
class LZ4CompressionCodec(conf: SparkConf) extends CompressionCodec {
+ private def lz4Factory: LZ4Factory = {
+ conf.get(IO_COMPRESSION_LZ4_FACTORY) match {
+ case "fastestInstance" => LZ4Factory.fastestInstance()
+ case "fastestJavaInstance" => LZ4Factory.fastestJavaInstance()
+ case "nativeInstance" => LZ4Factory.nativeInstance()
+ case "safeInstance" => LZ4Factory.safeInstance()
+ case "unsafeInstance" => LZ4Factory.unsafeInstance()
+ }
+ }
+
+ private def xxHashFactory: XXHashFactory = {
+ conf.get(IO_COMPRESSION_LZ4_FACTORY) match {
+ case "fastestInstance" => XXHashFactory.fastestInstance()
+ case "fastestJavaInstance" => XXHashFactory.fastestJavaInstance()
+ case "nativeInstance" => XXHashFactory.nativeInstance()
+ case "safeInstance" => XXHashFactory.safeInstance()
+ case "unsafeInstance" => XXHashFactory.unsafeInstance()
+ }
+ }
+
+ private def defaultSeed: Int = 0x9747b28c //
LZ4BlockOutputStream.DEFAULT_SEED
+
override def compressedOutputStream(s: OutputStream): OutputStream = {
val blockSize = conf.get(IO_COMPRESSION_LZ4_BLOCKSIZE).toInt
- new LZ4BlockOutputStream(s, blockSize)
Review comment:
This calls
https://github.com/lz4/lz4-java/blob/1.6.0/src/java/net/jpountz/lz4/LZ4BlockOutputStream.java#L137,
which, in turn, calls
https://github.com/lz4/lz4-java/blob/1.6.0/src/java/net/jpountz/lz4/LZ4BlockOutputStream.java#L123:
```java
public LZ4BlockOutputStream(OutputStream out, int blockSize, LZ4Compressor
compressor) {
this(out, blockSize, compressor,
XXHashFactory.fastestInstance().newStreamingHash32(DEFAULT_SEED).asChecksum(),
false);
}
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]