This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch branch-0.x in repository https://gitbox.apache.org/repos/asf/hudi.git
commit a233bbecb3ac9fe6f4fd5732857f87df435b8e1e Author: Danny Chan <[email protected]> AuthorDate: Wed Apr 17 14:37:28 2024 +0800 [HUDI-7625] Avoid unnecessary rewrite for metadata table (#11038) --- .../src/main/java/org/apache/hudi/io/HoodieMergeHandle.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java index 749b08c3e7e..3f9aa2981c1 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java @@ -332,7 +332,11 @@ public class HoodieMergeHandle<T, I, K, O> extends HoodieWriteHandle<T, I, K, O> * Go through an old record. Here if we detect a newer version shows up, we write the new one to the file. */ public void write(HoodieRecord<T> oldRecord) { - Schema oldSchema = config.populateMetaFields() ? writeSchemaWithMetaFields : writeSchema; + // Use schema with metadata files no matter whether 'hoodie.populate.meta.fields' is enabled + // to avoid unnecessary rewrite. Even with metadata table(whereas the option 'hoodie.populate.meta.fields' is configured as false), + // the record is deserialized with schema including metadata fields, + // see HoodieMergeHelper#runMerge for more details. + Schema oldSchema = writeSchemaWithMetaFields; Schema newSchema = preserveMetadata ? writeSchemaWithMetaFields : writeSchema; boolean copyOldRecord = true; String key = oldRecord.getRecordKey(oldSchema, keyGeneratorOpt);
