the-other-tim-brown commented on code in PR #17581:
URL: https://github.com/apache/hudi/pull/17581#discussion_r2639983864
##########
hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchemaUtils.java:
##########
@@ -1539,4 +1540,202 @@ public void
testConvertValueForSpecificDataTypes_UnionWithNull() {
assertTrue(result instanceof LocalDate);
assertEquals(LocalDate.of(2023, 1, 1), result);
}
+
+ @Test
+ void testResolveUnionSchemaWithNonUnionSchema() {
+ // Non-union schemas should be returned as-is
+ HoodieSchema stringSchema = HoodieSchema.create(HoodieSchemaType.STRING);
+ HoodieSchema result = HoodieSchemaUtils.resolveUnionSchema(stringSchema,
"any");
+
+ assertSame(stringSchema, result);
+ }
+
+ @Test
+ void testResolveUnionSchemaWithSimpleNullableUnion() {
+ // Simple nullable union: ["null", "string"] should return the non-null
type efficiently
+ HoodieSchema nullableString =
HoodieSchema.createNullable(HoodieSchema.create(HoodieSchemaType.STRING));
+ HoodieSchema result = HoodieSchemaUtils.resolveUnionSchema(nullableString,
"string");
+
+ assertEquals(HoodieSchemaType.STRING, result.getType());
+ }
+
+ @Test
+ void testResolveUnionSchemaWithSimpleNullableRecord() {
+ // Test with nullable record type
+ HoodieSchema personSchema = HoodieSchema.createRecord(
+ "Person",
+ null,
+ null,
+ Collections.singletonList(
+ HoodieSchemaField.of("name",
HoodieSchema.create(HoodieSchemaType.STRING))
+ )
+ );
+
+ HoodieSchema nullablePerson = HoodieSchema.createNullable(personSchema);
+ HoodieSchema result = HoodieSchemaUtils.resolveUnionSchema(nullablePerson,
"Person");
+
+ assertEquals(HoodieSchemaType.RECORD, result.getType());
+ assertEquals("Person", result.getName());
+ assertFalse(result.isNullable());
+ }
+
+ @Test
+ void testResolveUnionSchemaWithComplexUnionMatchingFullName() {
+ // Complex union with 3+ types, matching by fullName
+ String unionSchemaJson = "{"
+ + "\"type\":\"record\","
+ + "\"name\":\"Container\","
+ + "\"fields\":[{"
+ + " \"name\":\"data\","
+ + " \"type\":[\"null\","
+ + "
{\"type\":\"record\",\"name\":\"PersonRecord\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"}]},"
+ + "
{\"type\":\"record\",\"name\":\"CompanyRecord\",\"fields\":[{\"name\":\"companyName\",\"type\":\"string\"}]}"
+ + " ]"
+ + "}]}";
+
+ HoodieSchema containerSchema = HoodieSchema.parse(unionSchemaJson);
+ HoodieSchema dataFieldSchema =
containerSchema.getField("data").get().schema();
+
+ // Resolve to PersonRecord
+ HoodieSchema personResult =
HoodieSchemaUtils.resolveUnionSchema(dataFieldSchema, "PersonRecord");
+ assertEquals(HoodieSchemaType.RECORD, personResult.getType());
+ assertEquals("PersonRecord", personResult.getName());
+ assertFalse(personResult.isNullable());
+ assertTrue(personResult.getField("name").isPresent());
+
+ // Resolve to CompanyRecord
+ HoodieSchema companyResult =
HoodieSchemaUtils.resolveUnionSchema(dataFieldSchema, "CompanyRecord");
+ assertEquals(HoodieSchemaType.RECORD, companyResult.getType());
+ assertEquals("CompanyRecord", companyResult.getName());
+ assertFalse(companyResult.isNullable());
+ assertTrue(companyResult.getField("companyName").isPresent());
+ }
+
+ @Test
+ void testResolveUnionSchemaWithNonNullableTwoTypeUnion() {
+ // Union of two non-nullable types should use the complex resolution path
+ String unionSchemaJson = "{"
+ + "\"type\":\"record\","
+ + "\"name\":\"Container\","
+ + "\"fields\":[{"
+ + " \"name\":\"data\","
+ + " \"type\":["
+ + "
{\"type\":\"record\",\"name\":\"TypeA\",\"fields\":[{\"name\":\"fieldA\",\"type\":\"string\"}]},"
+ + "
{\"type\":\"record\",\"name\":\"TypeB\",\"fields\":[{\"name\":\"fieldB\",\"type\":\"int\"}]}"
+ + " ]"
+ + "}]}";
+
+ HoodieSchema containerSchema = HoodieSchema.parse(unionSchemaJson);
+ HoodieSchema dataFieldSchema =
containerSchema.getField("data").get().schema();
+
+ // Resolve to TypeA
+ HoodieSchema typeAResult =
HoodieSchemaUtils.resolveUnionSchema(dataFieldSchema, "TypeA");
+ assertEquals(HoodieSchemaType.RECORD, typeAResult.getType());
+ assertEquals("TypeA", typeAResult.getName());
+ assertFalse(typeAResult.isNullable());
+
+ // Resolve to TypeB
+ HoodieSchema typeBResult =
HoodieSchemaUtils.resolveUnionSchema(dataFieldSchema, "TypeB");
+ assertEquals(HoodieSchemaType.RECORD, typeBResult.getType());
+ assertEquals("TypeB", typeBResult.getName());
+ assertFalse(typeAResult.isNullable());
+ }
+
+ @Test
+ void testResolveUnionSchemaThrowsExceptionWhenNoMatch() {
+ // Complex union where the requested fullName doesn't match any type
+ String unionSchemaJson = "{"
+ + "\"type\":\"record\","
+ + "\"name\":\"Container\","
+ + "\"fields\":[{"
+ + " \"name\":\"data\","
+ + " \"type\":[\"null\","
+ + "
{\"type\":\"record\",\"name\":\"PersonRecord\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"}]},"
+ + "
{\"type\":\"record\",\"name\":\"CompanyRecord\",\"fields\":[{\"name\":\"companyName\",\"type\":\"string\"}]}"
+ + " ]"
+ + "}]}";
+
+ HoodieSchema containerSchema = HoodieSchema.parse(unionSchemaJson);
+ HoodieSchema dataFieldSchema =
containerSchema.getField("data").get().schema();
+
+ // Try to resolve to a type that doesn't exist in the union
+ org.apache.hudi.internal.schema.HoodieSchemaException exception =
assertThrows(
+ org.apache.hudi.internal.schema.HoodieSchemaException.class,
+ () -> HoodieSchemaUtils.resolveUnionSchema(dataFieldSchema,
"AnimalRecord")
+ );
+
+ assertTrue(exception.getMessage().contains("Unsupported UNION type"));
+ assertTrue(exception.getMessage().contains("Only UNION of a null type and
a non-null type is supported"));
+ }
+
+ @Test
+ void testResolveUnionSchemaWithNamespacedRecords() {
+ // Test with fully qualified names (with namespace)
+ String unionSchemaJson = "{"
+ + "\"type\":\"record\","
+ + "\"name\":\"Container\","
+ + "\"namespace\":\"com.example\","
+ + "\"fields\":[{"
+ + " \"name\":\"data\","
+ + " \"type\":[\"null\","
+ + "
{\"type\":\"record\",\"name\":\"Person\",\"namespace\":\"com.example.model\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"}]},"
+ + "
{\"type\":\"record\",\"name\":\"Company\",\"namespace\":\"com.example.model\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"}]}"
+ + " ]"
+ + "}]}";
+
+ HoodieSchema containerSchema = HoodieSchema.parse(unionSchemaJson);
+ HoodieSchema dataFieldSchema =
containerSchema.getField("data").get().schema();
+
+ // Resolve using fully qualified name
+ HoodieSchema personResult =
HoodieSchemaUtils.resolveUnionSchema(dataFieldSchema,
"com.example.model.Person");
+ assertEquals(HoodieSchemaType.RECORD, personResult.getType());
+ assertEquals("Person", personResult.getName());
+ assertFalse(personResult.isNullable());
+ assertEquals("com.example.model", personResult.getNamespace().get());
+
+ // Resolve Company
+ HoodieSchema companyResult =
HoodieSchemaUtils.resolveUnionSchema(dataFieldSchema,
"com.example.model.Company");
+ assertEquals(HoodieSchemaType.RECORD, companyResult.getType());
+ assertEquals("Company", companyResult.getName());
+ assertFalse(companyResult.isNullable());
+ }
+
+ @Test
+ void testResolveUnionSchemaWithPrimitiveTypes() {
+ // Test union containing primitive types (although less common)
+ // Union of null and string, but passed through the full name matching path
+ HoodieSchema nullableString =
HoodieSchema.createNullable(HoodieSchema.create(HoodieSchemaType.STRING));
+
+ // For simple 2-element nullable union, should use fast path
+ HoodieSchema result = HoodieSchemaUtils.resolveUnionSchema(nullableString,
"string");
+ assertEquals(HoodieSchemaType.STRING, result.getType());
+ }
+
+ @Test
+ void testResolveUnionSchemaConsistencyWithOriginalAvroImpl() {
+ // Verify that HoodieSchemaUtils.resolveUnionSchema produces equivalent
results to the original AvroSchemaUtils.resolveUnionSchema
+ String unionSchemaJson = "{"
+ + "\"type\":\"record\","
+ + "\"name\":\"TestRecord\","
+ + "\"fields\":[{"
+ + " \"name\":\"unionField\","
+ + " \"type\":[\"null\","
+ + "
{\"type\":\"record\",\"name\":\"TypeA\",\"fields\":[{\"name\":\"a\",\"type\":\"int\"}]},"
+ + "
{\"type\":\"record\",\"name\":\"TypeB\",\"fields\":[{\"name\":\"b\",\"type\":\"string\"}]}"
+ + " ]"
+ + "}]}";
+
+ Schema avroSchema = new Schema.Parser().parse(unionSchemaJson);
+ HoodieSchema hoodieSchema = HoodieSchema.parse(unionSchemaJson);
+
+ Schema avroFieldSchema = avroSchema.getField("unionField").schema();
+ HoodieSchema hoodieFieldSchema =
hoodieSchema.getField("unionField").get().schema();
+
+ // Resolve using both implementations
+ Schema avroResult = AvroSchemaUtils.resolveUnionSchema(avroFieldSchema,
"TypeA");
+ HoodieSchema hoodieResult =
HoodieSchemaUtils.resolveUnionSchema(hoodieFieldSchema, "TypeA");
+
+ // Should produce equivalent schemas
+ assertEquals(avroResult.toString(), hoodieResult.toString());
Review Comment:
bump on this, it looks like it is not in the latest commit
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]