[GitHub] cloud-fan commented on a change in pull request #23639: [SPARK-26716][SQL] FileFormat: the supported types of read/write should be consistent

GitBox Fri, 25 Jan 2019 11:08:30 -0800

cloud-fan commented on a change in pull request #23639: [SPARK-26716][SQL] 
FileFormat: the supported types of read/write should be consistent
URL: https://github.com/apache/spark/pull/23639#discussion_r251101618


 ##########
 File path: 
sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
 ##########
 @@ -367,69 +367,42 @@ class FileBasedDataSourceSuite extends QueryTest with 
SharedSQLContext with Befo
   }
 
   test("SPARK-24204 error handling for unsupported Null data types - csv, 
parquet, orc") {
-    withTempDir { dir =>
-      val tempDir = new File(dir, "files").getCanonicalPath
-
-      Seq("orc").foreach { format =>
-        // write path
-        var msg = intercept[AnalysisException] {
-          sql("select 
null").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        msg = intercept[AnalysisException] {
-          spark.udf.register("testType", () => new NullData())
-          sql("select 
testType()").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        // read path
-        // We expect the types below should be passed for 
backward-compatibility
-
-        // Null type
-        var schema = StructType(StructField("a", NullType, true) :: Nil)
-        spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-        spark.read.schema(schema).format(format).load(tempDir).collect()
-
-        // UDT having null data
-        schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
-        spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-        spark.read.schema(schema).format(format).load(tempDir).collect()
-      }
-
-      Seq("parquet", "csv").foreach { format =>
-        // write path
-        var msg = intercept[AnalysisException] {
-          sql("select 
null").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        msg = intercept[AnalysisException] {
-          spark.udf.register("testType", () => new NullData())
-          sql("select 
testType()").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        // read path
-        msg = intercept[AnalysisException] {
-          val schema = StructType(StructField("a", NullType, true) :: Nil)
-          spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-          spark.read.schema(schema).format(format).load(tempDir).collect()
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        msg = intercept[AnalysisException] {
-          val schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
-          spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-          spark.read.schema(schema).format(format).load(tempDir).collect()
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
+    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "orc") {
 
 Review comment:
   AFAIK the corresponding check is not implemented in orc v2 source yet, if we 
don't disable v2 here, we will see runtime errors. Shall we leave a TODO here 
and say this check should be done in orc v2 source as well?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] cloud-fan commented on a change in pull request #23639: [SPARK-26716][SQL] FileFormat: the supported types of read/write should be consistent

Reply via email to