YannByron commented on PR #6788:
URL: https://github.com/apache/hudi/pull/6788#issuecomment-1258869683
@alexeykudinkin yep, i forget to add UT for this.
this is a case that trigger this bug i mentioned (modified from
TestAvroSerDe)
```
def testAvroUnionSerDe(): Unit = {
val originalAvroRecord = {
val minValue = new GenericData.Record(IntWrapper.SCHEMA$)
minValue.put("value", 9)
val maxValue = new GenericData.Record(IntWrapper.SCHEMA$)
maxValue.put("value", 10)
val record = new GenericData.Record(HoodieMetadataColumnStats.SCHEMA$)
record.put("fileName",
"9388c460-4ace-4274-9a0b-d44606af60af-0_2-25-35_20220520154514641.parquet")
record.put("columnName", "c8")
record.put("minValue", minValue)
record.put("maxValue", maxValue)
record.put("valueCount", 10L)
record.put("nullCount", 0L)
record.put("totalSize", 94L)
record.put("totalUncompressedSize", 54L)
record.put("isDeleted", false)
record
}
val originalAvroRecord2 = {
val minValue = new GenericData.Record(IntWrapper.SCHEMA$)
minValue.put("value", 9)
val maxValue = new GenericData.Record(IntWrapper.SCHEMA$)
maxValue.put("value", 10)
val record = new GenericData.Record(HoodieMetadataColumnStats.SCHEMA$)
record.put("fileName",
"9388c460-4ace-4274-9a0b-d44606af60af-0_2-25-35_20220520154514641.parquet")
record.put("columnName", "c8")
record.put("minValue", minValue)
record.put("maxValue", maxValue)
record.put("valueCount", 10L)
record.put("nullCount", 0L)
record.put("totalSize", 94L)
record.put("totalUncompressedSize", 55L) // only change this field.
record.put("isDeleted", false)
record
}
val avroSchema = HoodieMetadataColumnStats.SCHEMA$
val SchemaType(catalystSchema, _) =
SchemaConverters.toSqlType(avroSchema)
val deserializer = sparkAdapter.createAvroDeserializer(avroSchema,
catalystSchema)
val serializer = sparkAdapter.createAvroSerializer(catalystSchema,
avroSchema, nullable = false)
val row = deserializer.deserialize(originalAvroRecord).get
val row2 = deserializer.deserialize(originalAvroRecord2).get //
deserialize originalAvroRecord2
assert(row != row2) // without this pr, row and row2 are the same object.
val deserializedAvroRecord = serializer.serialize(row)
assertEquals(originalAvroRecord, deserializedAvroRecord)
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]