This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-1.1.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 119ee0cffa36cb784ffb60748f52b56c97b8857b Author: Shuo Cheng <[email protected]> AuthorDate: Sat Oct 18 16:54:55 2025 +0800 fix: Ignore field nullability while checking whether record should be rewritten in COW write path (#14094) --- .../java/org/apache/hudi/avro/AvroSchemaUtils.java | 11 +++++-- .../org/apache/hudi/avro/TestAvroSchemaUtils.java | 38 ++++++++++++++++++++-- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java index 33f8ce9bf814..4e51e43ebde1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java @@ -175,10 +175,17 @@ public class AvroSchemaUtils { * </ol> */ public static boolean isStrictProjectionOf(Schema sourceSchema, Schema targetSchema) { - return isProjectionOfInternal(sourceSchema, targetSchema, AvroSchemaUtils::isAtomicTypeEquals); + return isProjectionOfInternal(sourceSchema, targetSchema, AvroSchemaUtils::isAtomicTypeProjectable); } - private static boolean isAtomicTypeEquals(Schema source, Schema target) { + private static boolean isAtomicTypeProjectable(Schema source, Schema target) { + // ignore nullability for projectable checking + source = resolveNullableSchema(source); + target = resolveNullableSchema(target); + if (source.getType() == Schema.Type.ENUM && target.getType() == Schema.Type.STRING + || source.getType() == Schema.Type.STRING && target.getType() == Schema.Type.ENUM) { + return true; + } // ignore name/namespace for FIXED type if (source.getType() == Schema.Type.FIXED && target.getType() == Schema.Type.FIXED) { return source.getLogicalType().equals(target.getLogicalType()) diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java index a4480225cab0..41b2d1cf1be8 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java @@ -25,6 +25,8 @@ import org.apache.hudi.exception.SchemaCompatibilityException; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; +import org.apache.parquet.avro.AvroSchemaConverter; +import org.apache.parquet.schema.MessageType; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -65,7 +67,23 @@ public class TestAvroSchemaUtils { + " \"scale\" : 2\n" + " }],\n" + " \"default\" : null\n" - + " },\n" + + " },\n" + + " {\n" + + " \"name\" : \"arrayInt\",\n" + + " \"type\" : [ \"null\", {\n" + + " \"type\" : \"array\",\n" + + " \"items\" : [ \"null\", \"int\" ]\n" + + " } ],\n" + + " \"default\" : null\n" + + " },\n" + + " {\n" + + " \"name\" : \"mapStrInt\",\n" + + " \"type\" : [ \"null\", {\n" + + " \"type\" : \"map\",\n" + + " \"values\" : [ \"null\", \"int\" ]\n" + + " } ],\n" + + " \"default\" : null\n" + + " },\n" + " {\n" + " \"name\": \"nested_record\",\n" + " \"type\": {\n" @@ -82,7 +100,16 @@ public class TestAvroSchemaUtils { + " }\n" + " ]\n" + " }\n" - + " }\n" + + " },\n" + + " { \n" + + " \"name\" : \"f_enum\",\n" + + " \"type\" : [ \"null\", {\n" + + " \"type\" : \"enum\",\n" + + " \"name\" : \"Visibility\",\n" + + " \"namespace\" : \"common.Types\",\n" + + " \"symbols\" : [ \"UNKNOWN\", \"PUBLIC\", \"PRIVATE\", \"SHARED\" ]\n" + + " }]\n" + + " }\n" + " ]\n" + "}\n"; @@ -219,6 +246,13 @@ public class TestAvroSchemaUtils { AvroSchemaUtils.isStrictProjectionOf( Schema.createUnion(Schema.create(Schema.Type.NULL), sourceSchema), Schema.createUnion(Schema.create(Schema.Type.NULL), projectedNestedSchema))); + + // Case #5: Validate project with field nullability changed + // Note: for array type, the nullability of element's type will be changed after conversion: + // AvroSchemaConverter: Avro Schema -> Parquet MessageType -> Avro Schema + MessageType messageType = new AvroSchemaConverter().convert(sourceSchema); + Schema converted = new AvroSchemaConverter().convert(messageType); + assertTrue(AvroSchemaUtils.isStrictProjectionOf(sourceSchema, converted)); } @Test
