Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/18865#discussion_r137930040
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
---
@@ -2034,4 +2034,34 @@ class JsonSuite extends QueryTest with
SharedSQLContext with TestJsonData {
}
}
}
+
+ test("SPARK-21610: Corrupt records are not handled properly when
creating a dataframe " +
+ "from a file") {
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
+ val data =
+ """{"field": 1}
+ |{"field": 2}
+ |{"field": "3"}""".stripMargin
+ Seq(data).toDF().repartition(1).write.text(path)
+ val schema = new StructType().add("field",
ByteType).add("_corrupt_record", StringType)
+ val expectedErrorMsg = "'_corrupt_record' cannot be selected alone"
+ var msg = intercept[AnalysisException] {
+
spark.read.schema(schema).json(path).select("_corrupt_record").collect()
+ }.getMessage
+ assert(msg.contains(expectedErrorMsg))
+ // negative cases
+ msg = intercept[AnalysisException] {
+
spark.read.schema(schema).json(path).select("_corrupt_record").show()
--- End diff --
You already have the one using `collect()`. No need to do it here.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]