Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/20266#discussion_r161653628
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala ---
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.test.SharedSQLContext
+
+class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
+ import testImplicits._
+
+ Seq("orc", "parquet", "csv", "json", "text").foreach { format =>
+ test(s"Writing empty datasets should not fail - $format") {
+ withTempDir { dir =>
+
Seq("str").toDS.limit(0).write.format(format).save(dir.getCanonicalPath +
"/tmp")
+ }
+ }
+ }
+
+ Seq("orc", "parquet", "csv", "json").foreach { format =>
+ test(s"Write and read back unicode schema - $format") {
+ withTempPath { path =>
+ val dir = path.getCanonicalPath
+
+ // scalastyle:off nonascii
+ val df = Seq("a").toDF("íê¸")
+ // scalastyle:on nonascii
+
+ df.write.format(format).option("header", "true").save(dir)
+ val answerDf = spark.read.format(format).option("header",
"true").load(dir)
+
+ assert(df.schema === answerDf.schema)
+ checkAnswer(df, answerDf)
+ }
+ }
+ }
+
+ // Only New OrcFileFormat supports this
+
Seq(classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat].getCanonicalName,
--- End diff --
`spark.sql.orc.impl` is native by default, can we just use "orc" here?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]