Github user fjh100456 commented on a diff in the pull request:
https://github.com/apache/spark/pull/19218#discussion_r143624224
--- Diff:
sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
---
@@ -68,6 +68,26 @@ private[hive] trait SaveAsHiveFile extends
DataWritingCommand {
.get("mapreduce.output.fileoutputformat.compress.type"))
}
+ fileSinkConf.tableInfo.getOutputFileFormatClassName match {
+ case formatName if formatName.endsWith("ParquetOutputFormat") =>
+ val compressionConf = "parquet.compression"
+ val compressionCodec = getCompressionByPriority(fileSinkConf,
compressionConf,
+ sparkSession.sessionState.conf.parquetCompressionCodec) match {
+ case "NONE" => "UNCOMPRESSED"
+ case _@x => x
+ }
+ hadoopConf.set(compressionConf, compressionCodec)
+ case formatName if formatName.endsWith("OrcOutputFormat") =>
+ val compressionConf = "orc.compress"
+ val compressionCodec = getCompressionByPriority(fileSinkConf,
compressionConf,
+ sparkSession.sessionState.conf.orcCompressionCodec) match {
+ case "UNCOMPRESSED" => "NONE"
--- End diff --
Yes, they are different, the style of parameter names and parameter values
are all different, and should be parquet and orc problems.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]