viirya commented on a change in pull request #29587:
URL: https://github.com/apache/spark/pull/29587#discussion_r487484664



##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
##########
@@ -103,4 +105,112 @@ class StructTypeSuite extends SparkFunSuite {
     val interval = "`a` INTERVAL"
     assert(fromDDL(interval).toDDL === interval)
   }
+
+  test("find missing (nested) fields") {
+    val schema = StructType.fromDDL(
+      "c1 INT, c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+    val resolver = SQLConf.get.resolver
+
+    val source1 = StructType.fromDDL("c1 INT")
+    val missing1 = StructType.fromDDL(
+      "c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+    assert(StructType.findMissingFields(source1, schema, resolver)
+      .exists(_.sameType(missing1)))
+
+    val source2 = StructType.fromDDL("c1 INT, c3 STRING")
+    val missing2 = StructType.fromDDL(
+      "c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+    assert(StructType.findMissingFields(source2, schema, resolver)
+      .exists(_.sameType(missing2)))
+
+    val source3 = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT>")
+    val missing3 = StructType.fromDDL(
+      "c2 STRUCT<c4: STRUCT<c5: INT, c6: INT>>")
+    assert(StructType.findMissingFields(source3, schema, resolver)
+      .exists(_.sameType(missing3)))
+
+    val source4 = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT, c4: 
STRUCT<c6: INT>>")
+    val missing4 = StructType.fromDDL(
+      "c2 STRUCT<c4: STRUCT<c5: INT>>")
+    assert(StructType.findMissingFields(source4, schema, resolver)
+      .exists(_.sameType(missing4)))
+
+    val schemaWithArray = StructType.fromDDL(
+      "c1 INT, c2 ARRAY<STRUCT<c3: INT, c4: LONG>>")
+    val source5 = StructType.fromDDL(
+      "c1 INT")
+    val missing5 = StructType.fromDDL(
+      "c2 ARRAY<STRUCT<c3: INT, c4: LONG>>")
+    assert(
+      StructType.findMissingFields(source5, schemaWithArray, resolver)
+        .exists(_.sameType(missing5)))
+
+    val schemaWithMap1 = StructType.fromDDL(
+      "c1 INT, c2 MAP<STRUCT<c3: INT, c4: LONG>, STRING>, c3 LONG")
+    val source6 = StructType.fromDDL(
+      "c1 INT, c3 LONG")
+    val missing6 = StructType.fromDDL(
+      "c2 MAP<STRUCT<c3: INT, c4: LONG>, STRING>")
+    assert(
+      StructType.findMissingFields(source6, schemaWithMap1, resolver)
+        .exists(_.sameType(missing6)))
+
+    val schemaWithMap2 = StructType.fromDDL(
+      "c1 INT, c2 MAP<STRING, STRUCT<c3: INT, c4: LONG>>, c3 STRING")
+    val source7 = StructType.fromDDL(
+      "c1 INT, c3 STRING")
+    val missing7 = StructType.fromDDL(
+      "c2 MAP<STRING, STRUCT<c3: INT, c4: LONG>>")
+    assert(
+      StructType.findMissingFields(source7, schemaWithMap2, resolver)
+        .exists(_.sameType(missing7)))
+
+    // Unsupported: nested struct in array, map
+    val source8 = StructType.fromDDL(
+      "c1 INT, c2 ARRAY<STRUCT<c3: INT>>")
+    // `findMissingFields` doesn't support looking into nested struct in array 
type.
+    assert(StructType.findMissingFields(source8, schemaWithArray, 
resolver).isEmpty)
+
+    val source9 = StructType.fromDDL(
+      "c1 INT, c2 MAP<STRUCT<c3: INT>, STRING>, c3 LONG")
+    // `findMissingFields` doesn't support looking into nested struct in map 
type.
+    assert(StructType.findMissingFields(source9, schemaWithMap1, 
resolver).isEmpty)
+
+    val source10 = StructType.fromDDL(
+      "c1 INT, c2 MAP<STRING, STRUCT<c3: INT>>, c3 STRING")
+    // `findMissingFields` doesn't support looking into nested struct in map 
type.
+    assert(StructType.findMissingFields(source10, schemaWithMap2, 
resolver).isEmpty)
+  }
+
+  test("find missing (nested) fields: case sensitive cases") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val schema = StructType.fromDDL(
+        "c1 INT, c2 STRUCT<c3: INT, C4: STRUCT<C5: INT, c6: INT>>")
+      val resolver = SQLConf.get.resolver
+
+      val source1 = StructType.fromDDL("c1 INT, C2 LONG")
+      val missing1 = StructType.fromDDL(
+        "c2 STRUCT<c3: INT, C4: STRUCT<C5: INT, c6: INT>>")
+      assert(StructType.findMissingFields(source1, schema, resolver)
+        .exists(_.sameType(missing1)))
+
+      val source2 = StructType.fromDDL("c2 LONG")
+      val missing2 = StructType.fromDDL(
+        "c1 INT")

Review comment:
       Oh, sure. I missed that breaking lines.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to