This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 205fabba9b6 [HUDI-8449] Fix deletion of record from FILES partition on
empty files list (#13384)
205fabba9b6 is described below
commit 205fabba9b6df5227c6a4b490dc57f2960b78044
Author: vamsikarnika <[email protected]>
AuthorDate: Fri Jun 27 06:27:22 2025 +0530
[HUDI-8449] Fix deletion of record from FILES partition on empty files list
(#13384)
* add delete meta entry for {parttions -> file list} meta entry for deleted
partitions;
* add test to validate the __all_partitions__ meta entry also got updated.
---------
Co-authored-by: Vamsi <[email protected]>
Co-authored-by: danny0405 <[email protected]>
---
.../org/apache/hudi/metadata/HoodieMetadataPayload.java | 13 ++++++++++++-
.../org/apache/hudi/metadata/HoodieTableMetadataUtil.java | 7 +++----
.../apache/hudi/functional/TestHoodieBackedMetadata.java | 6 ++++++
3 files changed, 21 insertions(+), 5 deletions(-)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
index caceb281de2..33d0f791441 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
@@ -292,14 +292,25 @@ public class HoodieMetadataPayload implements
HoodieRecordPayload<HoodieMetadata
public static HoodieRecord<HoodieMetadataPayload>
createPartitionFilesRecord(String partition,
Map<String, Long> filesAdded,
List<String> filesDeleted) {
+ return createPartitionFilesRecord(partition, filesAdded, filesDeleted,
false);
+ }
+
+ public static HoodieRecord<HoodieMetadataPayload>
createPartitionFilesRecord(String partition,
+
Map<String, Long> filesAdded,
+
List<String> filesDeleted,
+
boolean isPartitionDeleted) {
String partitionIdentifier =
getPartitionIdentifierForFilesPartition(partition);
+ HoodieKey key = new HoodieKey(partitionIdentifier,
MetadataPartitionType.FILES.getPartitionPath());
+ if (isPartitionDeleted) {
+ return new HoodieAvroRecord<>(key, new
HoodieMetadataPayload(Option.empty()));
+ }
+
int size = filesAdded.size() + filesDeleted.size();
Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>(size, 1);
filesAdded.forEach((fileName, fileSize) -> fileInfo.put(fileName, new
HoodieMetadataFileInfo(fileSize, false)));
filesDeleted.forEach(fileName -> fileInfo.put(fileName,
DELETE_FILE_METADATA));
- HoodieKey key = new HoodieKey(partitionIdentifier,
MetadataPartitionType.FILES.getPartitionPath());
HoodieMetadataPayload payload = new
HoodieMetadataPayload(key.getRecordKey(),
MetadataPartitionType.FILES.getRecordType(), fileInfo);
return new HoodieAvroRecord<>(key, payload);
}
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index 41cee569043..dcee9952eed 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -693,13 +693,12 @@ public class HoodieTableMetadataUtil {
int[] fileDeleteCount = {0};
List<String> deletedPartitions = new ArrayList<>();
cleanMetadata.getPartitionMetadata().forEach((partitionName,
partitionMetadata) -> {
+ boolean isPartitionDeleted = partitionMetadata.getIsPartitionDeleted();
// Files deleted from a partition
List<String> deletedFiles = partitionMetadata.getDeletePathPatterns();
- HoodieRecord record =
HoodieMetadataPayload.createPartitionFilesRecord(partitionName,
Collections.emptyMap(),
- deletedFiles);
- records.add(record);
+
records.add(HoodieMetadataPayload.createPartitionFilesRecord(partitionName,
Collections.emptyMap(),
+ deletedFiles, isPartitionDeleted));
fileDeleteCount[0] += deletedFiles.size();
- boolean isPartitionDeleted = partitionMetadata.getIsPartitionDeleted();
if (isPartitionDeleted) {
deletedPartitions.add(partitionName);
}
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
index 79166960ba3..712c8ba7075 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
+++
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
@@ -3037,6 +3037,12 @@ public class TestHoodieBackedMetadata extends
TestHoodieMetadataBase {
}
writeStatuses = client.upsert(jsc.parallelize(upsertRecords, 1),
newCommitTime);
assertTrue(client.commit(newCommitTime, writeStatuses));
+
+ // assert entry is not present for deleted partition in metadata table
+ HoodieTableMetadata tableMetadata = metadata(client, storage);
+
assertTrue(tableMetadata.getRecordsByKeyPrefixes(Collections.singletonList(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH),
FILES.getPartitionPath(), false).isEmpty());
+
assertTrue(tableMetadata.getAllPartitionPaths().contains(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH));
+
assertFalse(tableMetadata.getAllPartitionPaths().contains(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH));
// above upsert would have triggered clean
validateMetadata(client);
assertEquals(1, metadata(client, storage).getAllPartitionPaths().size());