Github user MaxGekk commented on a diff in the pull request:
https://github.com/apache/spark/pull/21902#discussion_r206064935
--- Diff:
external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala ---
@@ -117,28 +117,19 @@ private[avro] class AvroFileFormat extends FileFormat
with DataSourceRegister {
dataSchema, nullable = false, parsedOptions.recordName,
parsedOptions.recordNamespace)
AvroJob.setOutputKeySchema(job, outputAvroSchema)
- val COMPRESS_KEY = "mapred.output.compress"
- parsedOptions.compression match {
- case "uncompressed" =>
- log.info("writing uncompressed Avro records")
- job.getConfiguration.setBoolean(COMPRESS_KEY, false)
-
- case "snappy" =>
- log.info("compressing Avro output using Snappy")
- job.getConfiguration.setBoolean(COMPRESS_KEY, true)
- job.getConfiguration.set(AvroJob.CONF_OUTPUT_CODEC,
DataFileConstants.SNAPPY_CODEC)
-
- case "deflate" =>
+ val (compression, levelInfo) = parsedOptions.compression match {
+ case "uncompressed" => (NULL_CODEC, "")
+ case codec @ (SNAPPY_CODEC | BZIP2_CODEC | XZ_CODEC) => (codec, "")
+ case DEFLATE_CODEC =>
val deflateLevel = spark.sessionState.conf.avroDeflateLevel
- log.info(s"compressing Avro output using deflate
(level=$deflateLevel)")
- job.getConfiguration.setBoolean(COMPRESS_KEY, true)
- job.getConfiguration.set(AvroJob.CONF_OUTPUT_CODEC,
DataFileConstants.DEFLATE_CODEC)
job.getConfiguration.setInt(AvroOutputFormat.DEFLATE_LEVEL_KEY,
deflateLevel)
-
- case unknown: String =>
- log.error(s"unsupported compression codec $unknown")
+ (DEFLATE_CODEC, s" (level = $deflateLevel)")
--- End diff --
Conversion of `-1` to `6` is performed in somewhere inside of `zlib`. Do
you want to hard code `6` in log message?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]