Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/19882#discussion_r155394617
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
---
@@ -614,27 +523,88 @@ class OrcQuerySuite extends QueryTest with
BeforeAndAfterAll with OrcTest {
}
}
- test("read from multiple orc input paths") {
- val path1 = Utils.createTempDir()
- val path2 = Utils.createTempDir()
- makeOrcFile((1 to 10).map(Tuple1.apply), path1)
- makeOrcFile((1 to 10).map(Tuple1.apply), path2)
- assertResult(20)(read.orc(path1.getCanonicalPath,
path2.getCanonicalPath).count())
- }
+ test("read from multiple orc input paths") {
+ val path1 = Utils.createTempDir()
+ val path2 = Utils.createTempDir()
+ makeOrcFile((1 to 10).map(Tuple1.apply), path1)
+ makeOrcFile((1 to 10).map(Tuple1.apply), path2)
+ val df = spark.read.orc(path1.getCanonicalPath, path2.getCanonicalPath)
+ assert(df.count() == 20)
+ }
+}
+
+class OrcQuerySuite extends OrcQueryTest with SharedSQLContext {
+ import testImplicits._
+
+ test("LZO compression options for writing to an ORC file") {
+ withTempPath { file =>
+ spark.range(0, 10).write
+ .option("compression", "LZO")
+ .orc(file.getCanonicalPath)
+
+ val maybeOrcFile =
file.listFiles().find(_.getName.endsWith(".lzo.orc"))
+ assert(maybeOrcFile.isDefined)
+
+ val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
+ val conf = OrcFile.readerOptions(new Configuration())
+ assert("LZO" === OrcFile.createReader(orcFilePath,
conf).getCompressionKind.name)
+ }
+ }
+
+ test("Schema discovery on empty ORC files") {
+ // SPARK-8501 is fixed.
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
+
+ withTable("empty_orc") {
+ withTempView("empty", "single") {
+ spark.sql(
+ s"""CREATE TABLE empty_orc(key INT, value STRING)
+ |USING ORC
+ |LOCATION '${dir.toURI}'
+ """.stripMargin)
+
+ val emptyDF = Seq.empty[(Int, String)].toDF("key",
"value").coalesce(1)
+ emptyDF.createOrReplaceTempView("empty")
+
+ // This creates 1 empty ORC file with ORC SerDe. We are using
this trick because
+ // Spark SQL ORC data source always avoids write empty ORC files.
+ spark.sql(
+ s"""INSERT INTO TABLE empty_orc
+ |SELECT key, value FROM empty
+ """.stripMargin)
+
+ val df = spark.read.orc(path)
+ assert(df.schema === emptyDF.schema.asNullable)
+ checkAnswer(df, emptyDF)
+ }
+ }
+ }
+ }
+
+ test("SPARK-21791 ORC should support column names with dot") {
--- End diff --
Oh, I overlooked. Sure, that's fine.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]