This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new d4d719d45dfc fix: Ignore field nullability while checking whether
record should be rewritten in COW write path (#14094)
d4d719d45dfc is described below
commit d4d719d45dfc7e55870590e03a53281b246d755b
Author: Shuo Cheng <[email protected]>
AuthorDate: Sat Oct 18 16:54:55 2025 +0800
fix: Ignore field nullability while checking whether record should be
rewritten in COW write path (#14094)
---
.../java/org/apache/hudi/avro/AvroSchemaUtils.java | 11 +++++--
.../org/apache/hudi/avro/TestAvroSchemaUtils.java | 38 ++++++++++++++++++++--
2 files changed, 45 insertions(+), 4 deletions(-)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
index 33f8ce9bf814..4e51e43ebde1 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
@@ -175,10 +175,17 @@ public class AvroSchemaUtils {
* </ol>
*/
public static boolean isStrictProjectionOf(Schema sourceSchema, Schema
targetSchema) {
- return isProjectionOfInternal(sourceSchema, targetSchema,
AvroSchemaUtils::isAtomicTypeEquals);
+ return isProjectionOfInternal(sourceSchema, targetSchema,
AvroSchemaUtils::isAtomicTypeProjectable);
}
- private static boolean isAtomicTypeEquals(Schema source, Schema target) {
+ private static boolean isAtomicTypeProjectable(Schema source, Schema target)
{
+ // ignore nullability for projectable checking
+ source = resolveNullableSchema(source);
+ target = resolveNullableSchema(target);
+ if (source.getType() == Schema.Type.ENUM && target.getType() ==
Schema.Type.STRING
+ || source.getType() == Schema.Type.STRING && target.getType() ==
Schema.Type.ENUM) {
+ return true;
+ }
// ignore name/namespace for FIXED type
if (source.getType() == Schema.Type.FIXED && target.getType() ==
Schema.Type.FIXED) {
return source.getLogicalType().equals(target.getLogicalType())
diff --git
a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
index a4480225cab0..41b2d1cf1be8 100644
--- a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java
@@ -25,6 +25,8 @@ import org.apache.hudi.exception.SchemaCompatibilityException;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
+import org.apache.parquet.avro.AvroSchemaConverter;
+import org.apache.parquet.schema.MessageType;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
@@ -65,7 +67,23 @@ public class TestAvroSchemaUtils {
+ " \"scale\" : 2\n"
+ " }],\n"
+ " \"default\" : null\n"
- + " },\n"
+ + " },\n"
+ + " {\n"
+ + " \"name\" : \"arrayInt\",\n"
+ + " \"type\" : [ \"null\", {\n"
+ + " \"type\" : \"array\",\n"
+ + " \"items\" : [ \"null\", \"int\" ]\n"
+ + " } ],\n"
+ + " \"default\" : null\n"
+ + " },\n"
+ + " {\n"
+ + " \"name\" : \"mapStrInt\",\n"
+ + " \"type\" : [ \"null\", {\n"
+ + " \"type\" : \"map\",\n"
+ + " \"values\" : [ \"null\", \"int\" ]\n"
+ + " } ],\n"
+ + " \"default\" : null\n"
+ + " },\n"
+ " {\n"
+ " \"name\": \"nested_record\",\n"
+ " \"type\": {\n"
@@ -82,7 +100,16 @@ public class TestAvroSchemaUtils {
+ " }\n"
+ " ]\n"
+ " }\n"
- + " }\n"
+ + " },\n"
+ + " { \n"
+ + " \"name\" : \"f_enum\",\n"
+ + " \"type\" : [ \"null\", {\n"
+ + " \"type\" : \"enum\",\n"
+ + " \"name\" : \"Visibility\",\n"
+ + " \"namespace\" : \"common.Types\",\n"
+ + " \"symbols\" : [ \"UNKNOWN\", \"PUBLIC\", \"PRIVATE\",
\"SHARED\" ]\n"
+ + " }]\n"
+ + " }\n"
+ " ]\n"
+ "}\n";
@@ -219,6 +246,13 @@ public class TestAvroSchemaUtils {
AvroSchemaUtils.isStrictProjectionOf(
Schema.createUnion(Schema.create(Schema.Type.NULL), sourceSchema),
Schema.createUnion(Schema.create(Schema.Type.NULL),
projectedNestedSchema)));
+
+ // Case #5: Validate project with field nullability changed
+ // Note: for array type, the nullability of element's type will be changed
after conversion:
+ // AvroSchemaConverter: Avro Schema -> Parquet MessageType -> Avro Schema
+ MessageType messageType = new AvroSchemaConverter().convert(sourceSchema);
+ Schema converted = new AvroSchemaConverter().convert(messageType);
+ assertTrue(AvroSchemaUtils.isStrictProjectionOf(sourceSchema, converted));
}
@Test