Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19218#discussion_r158443604
--- Diff:
sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveOptions.scala
---
@@ -102,4 +111,18 @@ object HiveOptions {
"collectionDelim" -> "colelction.delim",
"mapkeyDelim" -> "mapkey.delim",
"lineDelim" -> "line.delim").map { case (k, v) =>
k.toLowerCase(Locale.ROOT) -> v }
+
+ def getHiveWriteCompression(tableInfo: TableDesc, sqlConf: SQLConf):
Option[(String, String)] = {
+ tableInfo.getOutputFileFormatClassName.toLowerCase match {
+ case formatName if formatName.endsWith("parquetoutputformat") =>
+ val compressionCodec = new
ParquetOptions(tableInfo.getProperties.asScala.toMap,
+ sqlConf).compressionCodecClassName
+ Option((ParquetOutputFormat.COMPRESSION, compressionCodec))
+ case formatName if formatName.endsWith("orcoutputformat") =>
+ val compressionCodec = new
OrcOptions(tableInfo.getProperties.asScala.toMap,
+ sqlConf).compressionCodec
--- End diff --
Also update `OrcOptions`'s `compressionCodec ` to
`compressionCodecClassName`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]