Github user MaxGekk commented on a diff in the pull request:
https://github.com/apache/spark/pull/20929#discussion_r190399084
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
---
@@ -2408,4 +2408,24 @@ class JsonSuite extends QueryTest with
SharedSQLContext with TestJsonData {
spark.read.option("mode", "PERMISSIVE").option("encoding",
"UTF-8").json(Seq(badJson).toDS()),
Row(badJson))
}
+
+ test("SPARK-23772 ignore column of all null values or empty array during
schema inference") {
+ withTempPath { tempDir =>
+ val path = tempDir.getAbsolutePath
+ Seq(
+ """{"a":null, "b":[null, null], "c":null, "d":[[], [null]],
"e":{}}""",
+ """{"a":null, "b":[null], "c":[], "d": [null, []], "e":{}}""",
+ """{"a":null, "b":[], "c":[], "d": null, "e":null}""")
+ .toDS().write.mode("overwrite").text(path)
--- End diff --
Do you need the `overwrite` mode?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]