This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch release-1.1.0
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 119ee0cffa36cb784ffb60748f52b56c97b8857b
Author: Shuo Cheng <[email protected]>
AuthorDate: Sat Oct 18 16:54:55 2025 +0800

    fix: Ignore field nullability while checking whether record should be 
rewritten in COW write path (#14094)
---
 .../java/org/apache/hudi/avro/AvroSchemaUtils.java | 11 +++++--
 .../org/apache/hudi/avro/TestAvroSchemaUtils.java  | 38 ++++++++++++++++++++--
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java 
b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
index 33f8ce9bf814..4e51e43ebde1 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
@@ -175,10 +175,17 @@ public class AvroSchemaUtils {
    * </ol>
    */
   public static boolean isStrictProjectionOf(Schema sourceSchema, Schema 
targetSchema) {
-    return isProjectionOfInternal(sourceSchema, targetSchema, 
AvroSchemaUtils::isAtomicTypeEquals);
+    return isProjectionOfInternal(sourceSchema, targetSchema, 
AvroSchemaUtils::isAtomicTypeProjectable);
   }
 
-  private static boolean isAtomicTypeEquals(Schema source, Schema target) {
+  private static boolean isAtomicTypeProjectable(Schema source, Schema target) 
{
+    // ignore nullability for projectable checking
+    source = resolveNullableSchema(source);
+    target = resolveNullableSchema(target);
+    if (source.getType() == Schema.Type.ENUM && target.getType() == 
Schema.Type.STRING
+        || source.getType() == Schema.Type.STRING && target.getType() == 
Schema.Type.ENUM) {
+      return true;
+    }
     // ignore name/namespace for FIXED type
     if (source.getType() == Schema.Type.FIXED && target.getType() == 
Schema.Type.FIXED) {
       return source.getLogicalType().equals(target.getLogicalType())
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java 
b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
index a4480225cab0..41b2d1cf1be8 100644
--- a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
@@ -25,6 +25,8 @@ import org.apache.hudi.exception.SchemaCompatibilityException;
 
 import org.apache.avro.LogicalTypes;
 import org.apache.avro.Schema;
+import org.apache.parquet.avro.AvroSchemaConverter;
+import org.apache.parquet.schema.MessageType;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.ValueSource;
@@ -65,7 +67,23 @@ public class TestAvroSchemaUtils {
       + "           \"scale\" : 2\n"
       + "           }],\n"
       + "       \"default\" : null\n"
-      + "      },\n"
+      + "    },\n"
+      + "    {\n"
+      + "         \"name\" : \"arrayInt\",\n"
+      + "          \"type\" : [ \"null\", {\n"
+      + "            \"type\" : \"array\",\n"
+      + "            \"items\" : [ \"null\", \"int\" ]\n"
+      + "           } ],\n"
+      + "          \"default\" : null\n"
+      + "    },\n"
+      + "    {\n"
+      + "         \"name\" : \"mapStrInt\",\n"
+      + "         \"type\" : [ \"null\", {\n"
+      + "           \"type\" : \"map\",\n"
+      + "           \"values\" : [ \"null\", \"int\" ]\n"
+      + "         } ],\n"
+      + "         \"default\" : null\n"
+      + "    },\n"
       + "    {\n"
       + "      \"name\": \"nested_record\",\n"
       + "      \"type\": {\n"
@@ -82,7 +100,16 @@ public class TestAvroSchemaUtils {
       + "          }\n"
       + "        ]\n"
       + "      }\n"
-      + "    }\n"
+      + "    },\n"
+      + "    { \n"
+      + "      \"name\" : \"f_enum\",\n"
+      + "      \"type\" : [ \"null\", {\n"
+      + "        \"type\" : \"enum\",\n"
+      + "        \"name\" : \"Visibility\",\n"
+      + "        \"namespace\" : \"common.Types\",\n"
+      + "        \"symbols\" : [ \"UNKNOWN\", \"PUBLIC\", \"PRIVATE\", 
\"SHARED\" ]\n"
+      + "         }]\n"
+      + "   }\n"
       + "  ]\n"
       + "}\n";
 
@@ -219,6 +246,13 @@ public class TestAvroSchemaUtils {
         AvroSchemaUtils.isStrictProjectionOf(
             Schema.createUnion(Schema.create(Schema.Type.NULL), sourceSchema),
             Schema.createUnion(Schema.create(Schema.Type.NULL), 
projectedNestedSchema)));
+
+    // Case #5: Validate project with field nullability changed
+    // Note: for array type, the nullability of element's type will be changed 
after conversion:
+    // AvroSchemaConverter: Avro Schema -> Parquet MessageType -> Avro Schema
+    MessageType messageType = new AvroSchemaConverter().convert(sourceSchema);
+    Schema converted = new AvroSchemaConverter().convert(messageType);
+    assertTrue(AvroSchemaUtils.isStrictProjectionOf(sourceSchema, converted));
   }
 
   @Test

Reply via email to