[
https://issues.apache.org/jira/browse/SPARK-23428?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Xiao Li updated SPARK-23428:
Description:
{noformat}
test("invalid json with leading nulls - from dataset") {
import testImplicits._
withTempDir { tempDir =>
val path = tempDir.getAbsolutePath
Seq("""{"firstName":"Chris", "lastName":"Baird"}""",
"""{"firstName":"Doug",
"lastName":"Rood"}""").toDS().write.mode("overwrite").text(path)
val schema = new StructType().add("a",
IntegerType).add("_corrupt_record", StringType)
val jsonDF = spark.read.schema(schema).option("mode",
"DROPMALFORMED").json(path)
checkAnswer(jsonDF, Seq(
Row("Chris", "Baird"), Row("Doug", "Rood")
))
}
}
{noformat}
After this PR it returns a wrong answer.
{noformat}
[null,null]
[null,null]
{noformat}
was:
test("invalid json with leading nulls - from dataset") {
import testImplicits._
withTempDir { tempDir =>
val path = tempDir.getAbsolutePath
Seq("""{"firstName":"Chris", "lastName":"Baird"}""",
"""{"firstName":"Doug",
"lastName":"Rood"}""").toDS().write.mode("overwrite").text(path)
val schema = new StructType().add("a",
IntegerType).add("_corrupt_record", StringType)
val jsonDF = spark.read.schema(schema).option("mode",
"DROPMALFORMED").json(path)
checkAnswer(jsonDF, Seq(
Row("Chris", "Baird"), Row("Doug", "Rood")
))
}
}
Now it returns
{noformat}
[null,null]
[null,null]
{noformat}
> Revert
> ---
>
> Key: SPARK-23428
> URL: https://issues.apache.org/jira/browse/SPARK-23428
> Project: Spark
> Issue Type: Bug
> Components: SQL
>Affects Versions: 2.3.0
>Reporter: Xiao Li
>Assignee: Xiao Li
>Priority: Blocker
>
> {noformat}
> test("invalid json with leading nulls - from dataset") {
> import testImplicits._
> withTempDir { tempDir =>
> val path = tempDir.getAbsolutePath
> Seq("""{"firstName":"Chris", "lastName":"Baird"}""",
> """{"firstName":"Doug",
> "lastName":"Rood"}""").toDS().write.mode("overwrite").text(path)
> val schema = new StructType().add("a",
> IntegerType).add("_corrupt_record", StringType)
> val jsonDF = spark.read.schema(schema).option("mode",
> "DROPMALFORMED").json(path)
> checkAnswer(jsonDF, Seq(
> Row("Chris", "Baird"), Row("Doug", "Rood")
> ))
> }
> }
> {noformat}
> After this PR it returns a wrong answer.
> {noformat}
> [null,null]
> [null,null]
> {noformat}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org