Github user MaxGekk commented on a diff in the pull request:
https://github.com/apache/spark/pull/22237#discussion_r212924389
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala ---
@@ -469,4 +470,23 @@ class JsonFunctionsSuite extends QueryTest with
SharedSQLContext {
checkAnswer(sql("""select json[0] from jsonTable"""), Seq(Row(null)))
}
+
+ test("from_json invalid json - check modes") {
+ val df = Seq("""{"a" 1}""", """{"a": 2}""").toDS()
+ val schema = new StructType().add("a", IntegerType)
+
+ checkAnswer(
+ df.select(from_json($"value", schema, Map("mode" -> "PERMISSIVE"))),
+ Row(Row(null)) :: Row(Row(2)) :: Nil)
+
+ val exceptionOne = intercept[SparkException] {
+ df.select(from_json($"value", schema, Map("mode" ->
"FAILFAST"))).collect()
+ }.getMessage
+ assert(exceptionOne.contains(
+ "Malformed records are detected in record parsing. Parse Mode:
FAILFAST."))
+
+ checkAnswer(
+ df.select(from_json($"value", schema, Map("mode" ->
"DROPMALFORMED"))),
+ Row(null) :: Row(Row(2)) :: Nil)
--- End diff --
The `DROPMALFORMED` mode returns `null` for malformed JSON lines. User can
filter them out later. @HyukjinKwon Do you know how to drop rows in
`UnaryExpression`s?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]