cloud-fan commented on a change in pull request #23639: [SPARK-26716][SQL]
FileFormat: the supported types of read/write should be consistent
URL: https://github.com/apache/spark/pull/23639#discussion_r251101618
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
##########
@@ -367,69 +367,42 @@ class FileBasedDataSourceSuite extends QueryTest with
SharedSQLContext with Befo
}
test("SPARK-24204 error handling for unsupported Null data types - csv,
parquet, orc") {
- withTempDir { dir =>
- val tempDir = new File(dir, "files").getCanonicalPath
-
- Seq("orc").foreach { format =>
- // write path
- var msg = intercept[AnalysisException] {
- sql("select
null").write.format(format).mode("overwrite").save(tempDir)
- }.getMessage
- assert(msg.toLowerCase(Locale.ROOT)
- .contains(s"$format data source does not support null data type."))
-
- msg = intercept[AnalysisException] {
- spark.udf.register("testType", () => new NullData())
- sql("select
testType()").write.format(format).mode("overwrite").save(tempDir)
- }.getMessage
- assert(msg.toLowerCase(Locale.ROOT)
- .contains(s"$format data source does not support null data type."))
-
- // read path
- // We expect the types below should be passed for
backward-compatibility
-
- // Null type
- var schema = StructType(StructField("a", NullType, true) :: Nil)
- spark.range(1).write.format(format).mode("overwrite").save(tempDir)
- spark.read.schema(schema).format(format).load(tempDir).collect()
-
- // UDT having null data
- schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
- spark.range(1).write.format(format).mode("overwrite").save(tempDir)
- spark.read.schema(schema).format(format).load(tempDir).collect()
- }
-
- Seq("parquet", "csv").foreach { format =>
- // write path
- var msg = intercept[AnalysisException] {
- sql("select
null").write.format(format).mode("overwrite").save(tempDir)
- }.getMessage
- assert(msg.toLowerCase(Locale.ROOT)
- .contains(s"$format data source does not support null data type."))
-
- msg = intercept[AnalysisException] {
- spark.udf.register("testType", () => new NullData())
- sql("select
testType()").write.format(format).mode("overwrite").save(tempDir)
- }.getMessage
- assert(msg.toLowerCase(Locale.ROOT)
- .contains(s"$format data source does not support null data type."))
-
- // read path
- msg = intercept[AnalysisException] {
- val schema = StructType(StructField("a", NullType, true) :: Nil)
- spark.range(1).write.format(format).mode("overwrite").save(tempDir)
- spark.read.schema(schema).format(format).load(tempDir).collect()
- }.getMessage
- assert(msg.toLowerCase(Locale.ROOT)
- .contains(s"$format data source does not support null data type."))
-
- msg = intercept[AnalysisException] {
- val schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
- spark.range(1).write.format(format).mode("overwrite").save(tempDir)
- spark.read.schema(schema).format(format).load(tempDir).collect()
- }.getMessage
- assert(msg.toLowerCase(Locale.ROOT)
- .contains(s"$format data source does not support null data type."))
+ withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "orc") {
Review comment:
AFAIK the corresponding check is not implemented in orc v2 source yet, if we
don't disable v2 here, we will see runtime errors. Shall we leave a TODO here
and say this check should be done in orc v2 source as well?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]