jzhuge opened a new issue #2774:
URL: https://github.com/apache/iceberg/issues/2774
When `nameMapping` is null, projected schema is automatically inferred from
read schema. We discovered issues when there is extra nested struct.
1. When there are two extra nested structs: projected schema can not be
serialized to json because 2 types in the nested struct have the same name
`rnull`
2. When there is one extra nested struct: json serialization is fine but
schema type name `rnull` still does not seem right
Unit tests:
```java
@Test
public void testInferredMappingNestedStruct() throws IOException {
Schema writeSchema = new Schema(
Types.NestedField.required(0, "id", Types.LongType.get())
);
Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table"));
record.put("id", 34L);
Schema readSchema = new Schema(
Types.NestedField.required(0, "id", Types.LongType.get()),
Types.NestedField.optional(3, "location", Types.StructType.of(
Types.NestedField.required(1, "lat", Types.FloatType.get()),
Types.NestedField.required(2, "long", Types.FloatType.get())))
);
Record projected = writeAndRead(writeSchema, readSchema, record, null);
AvroTestHelpers.assertEquals(writeSchema.asStruct(), record, projected);
validateJsonSerialization(projected);
validateNewNestedStruct(projected, readSchema.findField("location"));
}
@Test
public void testInferredMappingTwoNestedStructs() throws IOException {
Schema writeSchema = new Schema(
Types.NestedField.required(0, "id", Types.LongType.get())
);
Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table"));
record.put("id", 34L);
Schema readSchema = new Schema(
Types.NestedField.required(0, "id", Types.LongType.get()),
Types.NestedField.optional(3, "location", Types.StructType.of(
Types.NestedField.required(1, "lat", Types.FloatType.get()),
Types.NestedField.required(2, "long", Types.FloatType.get()))),
Types.NestedField.optional(4, "address", Types.StructType.of(
Types.NestedField.required(5, "street",
Types.IntegerType.get())))
);
Record projected = writeAndRead(writeSchema, readSchema, record, null);
AvroTestHelpers.assertEquals(writeSchema.asStruct(), record, projected);
validateJsonSerialization(projected);
validateNewNestedStruct(projected, readSchema.findField("location"));
validateNewNestedStruct(projected, readSchema.findField("address"));
}
private void validateJsonSerialization(Record projected) {
Assert.assertFalse("Projected schema can be serialized to json",
projected.getSchema().toString().isEmpty());
}
private void validateNewNestedStruct(Record projected, Types.NestedField
field) {
String newTypeName = "r" + field.fieldId();
String newFieldName = field.name() + "_" + newTypeName;
Assert.assertNull(field.name() + " field should be null",
projected.get(newFieldName));
Assert.assertNotNull(field.name() + " field is renamed to " +
newFieldName,
projected.getSchema().getField(newFieldName));
Assert.assertEquals(field.name() + " field should have schema type name
" + newTypeName,
newTypeName,
projected.getSchema().getField(newFieldName).schema().getTypes().get(1).getName());
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]