This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f378b506bf1 [SPARK-45470][SQL] Avoid paste string value of hive orc
compression kind
f378b506bf1 is described below
commit f378b506bf1fc116e5dc4786d786e50d4a56574a
Author: Jiaan Geng <[email protected]>
AuthorDate: Mon Oct 9 23:04:15 2023 -0700
[SPARK-45470][SQL] Avoid paste string value of hive orc compression kind
### What changes were proposed in this pull request?
Currently, Hive supports ORC format with some compression codec( Please
refer
[ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java](https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java)).
Spark pasted many string literal of these compression codec. It is easy to
make mistakes and reduce development efficiency.
### Why are the changes needed?
Avoid paste string value of hive orc compression kind
### Does this PR introduce _any_ user-facing change?
'No'.
Just update inner implementation.
### How was this patch tested?
Exists test cases.
### Was this patch authored or co-authored using generative AI tooling?
'No'.
Closes #43296 from beliefer/SPARK-45470.
Authored-by: Jiaan Geng <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/spark/sql/hive/CompressionCodecSuite.scala | 14 ++++++++++----
.../spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala | 5 +++--
.../org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala | 4 +++-
3 files changed, 16 insertions(+), 7 deletions(-)
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala
index 6669fbdfbde..a5d11f6e0e1 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala
@@ -23,6 +23,7 @@ import java.util.Locale
import scala.jdk.CollectionConverters._
import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind
import org.apache.orc.OrcConf.COMPRESS
import org.apache.parquet.hadoop.ParquetOutputFormat
import org.scalatest.BeforeAndAfterAll
@@ -291,8 +292,10 @@ class CompressionCodecSuite extends TestHiveSingleton with
ParquetTest with Befo
tableCompressCodecs = List("UNCOMPRESSED", "SNAPPY", "GZIP"),
sessionCompressCodecs = List("SNAPPY", "GZIP", "SNAPPY"))
checkForTableWithCompressProp("orc",
- tableCompressCodecs = List("NONE", "SNAPPY", "ZLIB"),
- sessionCompressCodecs = List("SNAPPY", "ZLIB", "SNAPPY"))
+ tableCompressCodecs =
+ List(CompressionKind.NONE.name, CompressionKind.SNAPPY.name,
CompressionKind.ZLIB.name),
+ sessionCompressCodecs =
+ List(CompressionKind.SNAPPY.name, CompressionKind.ZLIB.name,
CompressionKind.SNAPPY.name))
}
test("table-level compression is not set but session-level compressions is
set ") {
@@ -301,7 +304,8 @@ class CompressionCodecSuite extends TestHiveSingleton with
ParquetTest with Befo
sessionCompressCodecs = List("UNCOMPRESSED", "SNAPPY", "GZIP"))
checkForTableWithCompressProp("orc",
tableCompressCodecs = List.empty,
- sessionCompressCodecs = List("NONE", "SNAPPY", "ZLIB"))
+ sessionCompressCodecs =
+ List(CompressionKind.NONE.name, CompressionKind.SNAPPY.name,
CompressionKind.ZLIB.name))
}
def checkTableWriteWithCompressionCodecs(format: String, compressCodecs:
List[String]): Unit = {
@@ -336,6 +340,8 @@ class CompressionCodecSuite extends TestHiveSingleton with
ParquetTest with Befo
test("test table containing mixed compression codec") {
checkTableWriteWithCompressionCodecs("parquet", List("UNCOMPRESSED",
"SNAPPY", "GZIP"))
- checkTableWriteWithCompressionCodecs("orc", List("NONE", "SNAPPY", "ZLIB"))
+ checkTableWriteWithCompressionCodecs(
+ "orc",
+ List(CompressionKind.NONE.name, CompressionKind.SNAPPY.name,
CompressionKind.ZLIB.name))
}
}
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
index 3b82a6c458c..e9b6bd28823 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.orc
import java.io.File
import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.catalog.CatalogUtils
@@ -98,7 +99,7 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {
val orcFilePath = maybeOrcFile.get.toPath.toString
val expectedCompressionKind =
OrcFileOperator.getFileReader(orcFilePath).get.getCompression
- assert("ZLIB" === expectedCompressionKind.name())
+ assert(CompressionKind.ZLIB.name() === expectedCompressionKind.name())
val copyDf = spark
.read
@@ -113,7 +114,7 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest
{
.orc(file.getCanonicalPath)
val expectedCompressionKind =
OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
- assert("SNAPPY" === expectedCompressionKind.name())
+ assert(CompressionKind.SNAPPY.name() === expectedCompressionKind.name())
}
}
}
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
index 9ee9ebc2282..43bcee5348a 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
@@ -21,6 +21,8 @@ import java.io.File
import scala.util.Random
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind
+
import org.apache.spark.SparkConf
import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.{DataFrame, SparkSession}
@@ -46,7 +48,7 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
override def getSparkSession: SparkSession = {
val conf = new SparkConf()
- conf.set("orc.compression", "snappy")
+ conf.set("orc.compression", CompressionKind.SNAPPY.name())
val sparkSession = SparkSession.builder()
.master("local[1]")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]