Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/20302#discussion_r162682668
--- Diff:
sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
---
@@ -105,4 +107,36 @@ class JsonHadoopFsRelationSuite extends
HadoopFsRelationTest {
)
}
}
+
+ test("invalid json with leading nulls - from file (multiLine=true)") {
+ import testImplicits._
+ withTempDir { tempDir =>
+ val path = tempDir.getAbsolutePath
+ Seq(badJson, """{"a":1}""").toDS().write.mode("overwrite").text(path)
+ val expected = s"""$badJson\n{"a":1}\n"""
+ val schema = new StructType().add("a",
IntegerType).add("_corrupt_record", StringType)
+ val df =
+ spark.read.format(dataSourceName).option("multiLine",
true).schema(schema).load(path)
+ checkAnswer(df, Row(null, expected))
+ }
+ }
+
+ test("invalid json with leading nulls - from file (multiLine=false)") {
+ import testImplicits._
+ withTempDir { tempDir =>
+ val path = tempDir.getAbsolutePath
+ Seq(badJson, """{"a":1}""").toDS().write.mode("overwrite").text(path)
+ val schema = new StructType().add("a",
IntegerType).add("_corrupt_record", StringType)
+ val df =
+ spark.read.format(dataSourceName).option("multiLine",
false).schema(schema).load(path)
+ checkAnswer(df, Seq(Row(1, null), Row(null, badJson)))
+ }
+ }
+
+ test("invalid json with leading nulls - from dataset") {
--- End diff --
See the PR https://github.com/apache/spark/pull/20331
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]