This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 38db88c8a2b [HUDI-7160] Copy over schema properties when adding Hudi
Metadata fields (#10212)
38db88c8a2b is described below
commit 38db88c8a2bb0c378295324692c4c0388e60e466
Author: Tim Brown <[email protected]>
AuthorDate: Wed Nov 29 22:54:12 2023 -0600
[HUDI-7160] Copy over schema properties when adding Hudi Metadata fields
(#10212)
---
.../java/org/apache/hudi/avro/HoodieAvroUtils.java | 3 +++
.../org/apache/hudi/avro/TestHoodieAvroUtils.java | 25 ++++++++++++++++++++++
2 files changed, 28 insertions(+)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
index 3800d9c1053..ac7dcd42979 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
@@ -302,6 +302,9 @@ public class HoodieAvroUtils {
}
Schema mergedSchema = Schema.createRecord(schema.getName(),
schema.getDoc(), schema.getNamespace(), false);
+ for (Map.Entry<String, Object> prop : schema.getObjectProps().entrySet()) {
+ mergedSchema.addProp(prop.getKey(), prop.getValue());
+ }
mergedSchema.setFields(parentFields);
return mergedSchema;
}
diff --git
a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
index 28b05435244..eb20081475f 100644
--- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
@@ -99,6 +99,12 @@ public class TestHoodieAvroUtils {
+ "{\"name\": \"non_pii_col\", \"type\": \"string\"},"
+ "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\":
\"user_profile\"}]}";
+ private static final String EXAMPLE_SCHEMA_WITH_PROPS = "{\"type\":
\"record\",\"name\": \"testrec\",\"fields\": [ "
+ + "{\"name\": \"timestamp\",\"type\": \"double\",
\"custom_field_property\":\"value\"},{\"name\": \"_row_key\", \"type\":
\"string\"},"
+ + "{\"name\": \"non_pii_col\", \"type\": \"string\"},"
+ + "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\":
\"user_profile\"}], "
+ + "\"custom_schema_property\": \"custom_schema_property_value\"}";
+
private static int NUM_FIELDS_IN_EXAMPLE_SCHEMA = 4;
private static String SCHEMA_WITH_METADATA_FIELD = "{\"type\":
\"record\",\"name\": \"testrec2\",\"fields\": [ "
@@ -604,4 +610,23 @@ public class TestHoodieAvroUtils {
.subtract((BigDecimal)
unwrapAvroValueWrapper(wrapperValue)).toPlainString());
}
}
+
+ @Test
+ public void testAddMetadataFields() {
+ Schema baseSchema = new Schema.Parser().parse(EXAMPLE_SCHEMA_WITH_PROPS);
+ Schema schemaWithMetadata = HoodieAvroUtils.addMetadataFields(baseSchema);
+ List<Schema.Field> updatedFields = schemaWithMetadata.getFields();
+ // assert fields added in expected order
+ assertEquals(HoodieRecord.COMMIT_TIME_METADATA_FIELD,
updatedFields.get(0).name());
+ assertEquals(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD,
updatedFields.get(1).name());
+ assertEquals(HoodieRecord.RECORD_KEY_METADATA_FIELD,
updatedFields.get(2).name());
+ assertEquals(HoodieRecord.PARTITION_PATH_METADATA_FIELD,
updatedFields.get(3).name());
+ assertEquals(HoodieRecord.FILENAME_METADATA_FIELD,
updatedFields.get(4).name());
+ // assert original fields are copied over
+ List<Schema.Field> originalFieldsInUpdatedSchema =
updatedFields.subList(5, updatedFields.size());
+ assertEquals(baseSchema.getFields(), originalFieldsInUpdatedSchema);
+ // validate properties are properly copied over
+ assertEquals("custom_schema_property_value",
schemaWithMetadata.getProp("custom_schema_property"));
+ assertEquals("value",
originalFieldsInUpdatedSchema.get(0).getProp("custom_field_property"));
+ }
}