Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/19882#discussion_r154824305
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
---
@@ -614,11 +531,63 @@ class OrcQuerySuite extends QueryTest with
BeforeAndAfterAll with OrcTest {
}
}
- test("read from multiple orc input paths") {
- val path1 = Utils.createTempDir()
- val path2 = Utils.createTempDir()
- makeOrcFile((1 to 10).map(Tuple1.apply), path1)
- makeOrcFile((1 to 10).map(Tuple1.apply), path2)
- assertResult(20)(read.orc(path1.getCanonicalPath,
path2.getCanonicalPath).count())
- }
+ test("read from multiple orc input paths") {
+ val path1 = Utils.createTempDir()
+ val path2 = Utils.createTempDir()
+ makeOrcFile((1 to 10).map(Tuple1.apply), path1)
+ makeOrcFile((1 to 10).map(Tuple1.apply), path2)
+ val df = spark.read.format(format).load(path1.getCanonicalPath,
path2.getCanonicalPath)
+ assert(df.count() == 20)
+ }
+}
+
+class OrcQuerySuite extends OrcQueryTest with SharedSQLContext {
+ import testImplicits._
+
+ test("LZO compression options for writing to an ORC file") {
+ withTempPath { file =>
+ spark.range(0, 10).write
+ .option("compression", "LZO")
+ .format(format)
+ .save(file.getCanonicalPath)
+
+ val maybeOrcFile =
file.listFiles().find(_.getName.endsWith(".lzo.orc"))
+ assert(maybeOrcFile.isDefined)
+
+ val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
+ val conf = OrcFile.readerOptions(new Configuration())
+ assert("LZO" === OrcFile.createReader(orcFilePath,
conf).getCompressionKind.name)
+ }
+ }
+
+ test("Schema discovery on empty ORC files") {
+ // SPARK-8501 is fixed.
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
+
+ withTable("empty_orc") {
+ withTempView("empty", "single") {
+ spark.sql(
+ s"""CREATE TABLE empty_orc(key INT, value STRING)
+ |USING $format
+ |LOCATION '${dir.toURI}'
+ """.stripMargin)
+
+ val emptyDF = Seq.empty[(Int, String)].toDF("key",
"value").coalesce(1)
+ emptyDF.createOrReplaceTempView("empty")
+
+ // This creates 1 empty ORC file with Hive ORC SerDe. We are
using this trick because
+ // Spark SQL ORC data source always avoids write empty ORC files.
--- End diff --
Is this still using Hive ORC SerDe?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]