[GitHub] spark pull request #19882: [SPARK-22672][SQL][TEST] Refactor ORC Tests

viirya Mon, 04 Dec 2017 17:23:51 -0800

Github user viirya commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19882#discussion_r154824305
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
 ---
    @@ -614,11 +531,63 @@ class OrcQuerySuite extends QueryTest with 
BeforeAndAfterAll with OrcTest {
         }
       }
     
    -   test("read from multiple orc input paths") {
    -     val path1 = Utils.createTempDir()
    -     val path2 = Utils.createTempDir()
    -     makeOrcFile((1 to 10).map(Tuple1.apply), path1)
    -     makeOrcFile((1 to 10).map(Tuple1.apply), path2)
    -     assertResult(20)(read.orc(path1.getCanonicalPath, 
path2.getCanonicalPath).count())
    -   }
    +  test("read from multiple orc input paths") {
    +    val path1 = Utils.createTempDir()
    +    val path2 = Utils.createTempDir()
    +    makeOrcFile((1 to 10).map(Tuple1.apply), path1)
    +    makeOrcFile((1 to 10).map(Tuple1.apply), path2)
    +    val df = spark.read.format(format).load(path1.getCanonicalPath, 
path2.getCanonicalPath)
    +    assert(df.count() == 20)
    +  }
    +}
    +
    +class OrcQuerySuite extends OrcQueryTest with SharedSQLContext {
    +  import testImplicits._
    +
    +  test("LZO compression options for writing to an ORC file") {
    +    withTempPath { file =>
    +      spark.range(0, 10).write
    +        .option("compression", "LZO")
    +        .format(format)
    +        .save(file.getCanonicalPath)
    +
    +      val maybeOrcFile = 
file.listFiles().find(_.getName.endsWith(".lzo.orc"))
    +      assert(maybeOrcFile.isDefined)
    +
    +      val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
    +      val conf = OrcFile.readerOptions(new Configuration())
    +      assert("LZO" === OrcFile.createReader(orcFilePath, 
conf).getCompressionKind.name)
    +    }
    +  }
    +
    +  test("Schema discovery on empty ORC files") {
    +    // SPARK-8501 is fixed.
    +    withTempPath { dir =>
    +      val path = dir.getCanonicalPath
    +
    +      withTable("empty_orc") {
    +        withTempView("empty", "single") {
    +          spark.sql(
    +            s"""CREATE TABLE empty_orc(key INT, value STRING)
    +               |USING $format
    +               |LOCATION '${dir.toURI}'
    +             """.stripMargin)
    +
    +          val emptyDF = Seq.empty[(Int, String)].toDF("key", 
"value").coalesce(1)
    +          emptyDF.createOrReplaceTempView("empty")
    +
    +          // This creates 1 empty ORC file with Hive ORC SerDe.  We are 
using this trick because
    +          // Spark SQL ORC data source always avoids write empty ORC files.
    --- End diff --
    
    Is this still using Hive ORC SerDe?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #19882: [SPARK-22672][SQL][TEST] Refactor ORC Tests

Reply via email to