[GitHub] spark pull request #19882: [SPARK-22672][SQL][TEST] Refactor ORC Tests

dongjoon-hyun Wed, 06 Dec 2017 15:18:04 -0800

Github user dongjoon-hyun commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19882#discussion_r155390570
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
 ---
    @@ -614,27 +523,88 @@ class OrcQuerySuite extends QueryTest with 
BeforeAndAfterAll with OrcTest {
         }
       }
     
    -   test("read from multiple orc input paths") {
    -     val path1 = Utils.createTempDir()
    -     val path2 = Utils.createTempDir()
    -     makeOrcFile((1 to 10).map(Tuple1.apply), path1)
    -     makeOrcFile((1 to 10).map(Tuple1.apply), path2)
    -     assertResult(20)(read.orc(path1.getCanonicalPath, 
path2.getCanonicalPath).count())
    -   }
    +  test("read from multiple orc input paths") {
    +    val path1 = Utils.createTempDir()
    +    val path2 = Utils.createTempDir()
    +    makeOrcFile((1 to 10).map(Tuple1.apply), path1)
    +    makeOrcFile((1 to 10).map(Tuple1.apply), path2)
    +    val df = spark.read.orc(path1.getCanonicalPath, path2.getCanonicalPath)
    +    assert(df.count() == 20)
    +  }
    +}
    +
    +class OrcQuerySuite extends OrcQueryTest with SharedSQLContext {
    +  import testImplicits._
    +
    +  test("LZO compression options for writing to an ORC file") {
    +    withTempPath { file =>
    +      spark.range(0, 10).write
    +        .option("compression", "LZO")
    +        .orc(file.getCanonicalPath)
    +
    +      val maybeOrcFile = 
file.listFiles().find(_.getName.endsWith(".lzo.orc"))
    +      assert(maybeOrcFile.isDefined)
    +
    +      val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
    +      val conf = OrcFile.readerOptions(new Configuration())
    +      assert("LZO" === OrcFile.createReader(orcFilePath, 
conf).getCompressionKind.name)
    +    }
    +  }
    +
    +  test("Schema discovery on empty ORC files") {
    +    // SPARK-8501 is fixed.
    +    withTempPath { dir =>
    +      val path = dir.getCanonicalPath
    +
    +      withTable("empty_orc") {
    +        withTempView("empty", "single") {
    +          spark.sql(
    +            s"""CREATE TABLE empty_orc(key INT, value STRING)
    +               |USING ORC
    +               |LOCATION '${dir.toURI}'
    +             """.stripMargin)
    +
    +          val emptyDF = Seq.empty[(Int, String)].toDF("key", 
"value").coalesce(1)
    +          emptyDF.createOrReplaceTempView("empty")
    +
    +          // This creates 1 empty ORC file with ORC SerDe.  We are using 
this trick because
    +          // Spark SQL ORC data source always avoids write empty ORC files.
    +          spark.sql(
    +            s"""INSERT INTO TABLE empty_orc
    +               |SELECT key, value FROM empty
    +             """.stripMargin)
    +
    +          val df = spark.read.orc(path)
    +          assert(df.schema === emptyDF.schema.asNullable)
    +          checkAnswer(df, emptyDF)
    +        }
    +      }
    +    }
    +  }
    +
    +  test("SPARK-21791 ORC should support column names with dot") {
    --- End diff --
    
    Old OrcFileFormat fails on this test case.
    Do you mean adding an exception-catching test case?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #19882: [SPARK-22672][SQL][TEST] Refactor ORC Tests

Reply via email to