Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20929#discussion_r190447608
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 ---
    @@ -2408,4 +2408,24 @@ class JsonSuite extends QueryTest with 
SharedSQLContext with TestJsonData {
           spark.read.option("mode", "PERMISSIVE").option("encoding", 
"UTF-8").json(Seq(badJson).toDS()),
           Row(badJson))
       }
    +
    +  test("SPARK-23772 ignore column of all null values or empty array during 
schema inference") {
    +     withTempPath { tempDir =>
    +      val path = tempDir.getAbsolutePath
    +      Seq(
    +        """{"a":null, "b":[null, null], "c":null, "d":[[], [null]], 
"e":{}}""",
    +        """{"a":null, "b":[null], "c":[], "d": [null, []], "e":{}}""",
    +        """{"a":null, "b":[], "c":[], "d": null, "e":null}""")
    +        .toDS().write.mode("overwrite").text(path)
    +      val df = spark.read.format("json")
    +        .option("dropFieldIfAllNull", true)
    +        .load(path)
    +      val expectedSchema = new StructType()
    +        .add("a", NullType).add("b", NullType).add("c", NullType).add("d", 
NullType)
    +        .add("e", NullType)
    +      assert(df.schema === expectedSchema)
    --- End diff --
    
    No, there's no explicit preference between them since the preferences are 
diverted even in committers. It's fine to use one of them.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to