xkrogen commented on a change in pull request #34009:
URL: https://github.com/apache/spark/pull/34009#discussion_r710315449



##########
File path: 
external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
##########
@@ -120,30 +120,41 @@ class AvroSerdeSuite extends SparkFunSuite {
     }
   }
 
-  test("Fail to convert for serialization with field count mismatch") {
-    // Note that this is allowed for deserialization, but not serialization
-    val tooManyFields =
-      
createAvroSchemaWithTopLevelFields(_.optionalInt("foo").optionalLong("bar"))
-    assertFailedConversionMessage(tooManyFields, Serializer, BY_NAME,
+  test("Fail to convert with missing Catalyst fields") {
+    val nestedFooField = 
SchemaBuilder.record("foo").fields().optionalInt("bar").endRecord()
+    val avroExtraOptional = createAvroSchemaWithTopLevelFields(
+      _.name("foo").`type`(nestedFooField).noDefault().optionalLong("bar"))
+    val avroExtraRequired = createAvroSchemaWithTopLevelFields(
+      _.name("foo").`type`(nestedFooField).noDefault().requiredLong("bar"))
+
+    // serializing with extra _nullable_ Avro field is okay, but fails if 
extra field is required
+    withFieldMatchType(Serializer.create(CATALYST_STRUCT, avroExtraOptional, 
_))
+    assertFailedConversionMessage(avroExtraRequired, Serializer, BY_NAME,
       "Found field 'bar' in Avro schema but there is no match in the SQL 
schema")
-    assertFailedConversionMessage(tooManyFields, Serializer, BY_POSITION,
+    assertFailedConversionMessage(avroExtraRequired, Serializer, BY_POSITION,
       "Found field 'bar' at position 1 of top-level record from Avro schema 
but there is no " +
         "match in the SQL schema at top-level record (using positional 
matching)")
 
-    val tooManyFieldsNested =
+    // deserializing should work regardless of whether the extra field is 
required or not
+    withFieldMatchType(Deserializer.create(CATALYST_STRUCT, avroExtraOptional, 
_))
+    withFieldMatchType(Deserializer.create(CATALYST_STRUCT, avroExtraRequired, 
_))
+
+    val avroExtraNestedOptional =
       createNestedAvroSchemaWithFields("foo", 
_.optionalInt("bar").optionalInt("baz"))
-    assertFailedConversionMessage(tooManyFieldsNested, Serializer, BY_NAME,
+    val avroExtraNestedRequired =
+      createNestedAvroSchemaWithFields("foo", 
_.optionalInt("bar").requiredInt("baz"))
+
+    // serializing with extra _nullable_ Avro field is okay, but fails if 
extra field is required
+    withFieldMatchType(Serializer.create(CATALYST_STRUCT, 
avroExtraNestedOptional, _))
+    assertFailedConversionMessage(avroExtraNestedRequired, Serializer, BY_NAME,
       "Found field 'foo.baz' in Avro schema but there is no match in the SQL 
schema")
-    assertFailedConversionMessage(tooManyFieldsNested, Serializer, BY_POSITION,
+    assertFailedConversionMessage(avroExtraNestedRequired, Serializer, 
BY_POSITION,
       s"Found field 'baz' at position 1 of field 'foo' from Avro schema but 
there is no match " +
         s"in the SQL schema at field 'foo' (using positional matching)")
 
-    val tooFewFields = createAvroSchemaWithTopLevelFields(f => f)
-    assertFailedConversionMessage(tooFewFields, Serializer, BY_NAME,
-      "Cannot find field 'foo' in Avro schema")
-    assertFailedConversionMessage(tooFewFields, Serializer, BY_POSITION,
-      "Cannot find field at position 0 of top-level record from Avro schema " +
-        "(using positional matching)")

Review comment:
       Removed the `tooFewFields` validation step because this is already 
covered by the `Fail to convert with missing nested Avro fields` test




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to