This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new d3241136827 [HUDI-6855] Exclude .hoodie_partition_metadata file in
base file group (#9707)
d3241136827 is described below
commit d3241136827a74d39238aaa91cda4258e38169f4
Author: Wechar Yu <[email protected]>
AuthorDate: Thu Sep 14 10:01:58 2023 +0800
[HUDI-6855] Exclude .hoodie_partition_metadata file in base file group
(#9707)
Before this commit, Hudi builds FileGroups for
.hoodie_partition_metadata.parquet, and resolves the .hoodie as fileId,
metadata as commit time. Although these partiition metadata file groups are
filtered when comparing commit time, this commit makes it simple by removing
them while building file groups.
---
.../table/view/AbstractTableFileSystemView.java | 11 +++++++--
.../table/view/TestHoodieTableFileSystemView.java | 28 ++++++++++++++++++++++
2 files changed, 37 insertions(+), 2 deletions(-)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
index 0910971e6b7..23b17b916c0 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
@@ -28,6 +28,7 @@ import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -478,8 +479,14 @@ public abstract class AbstractTableFileSystemView
implements SyncableFileSystemV
* @param statuses List of File-Status
*/
private Stream<HoodieBaseFile> convertFileStatusesToBaseFiles(FileStatus[]
statuses) {
- Predicate<FileStatus> roFilePredicate = fileStatus ->
fileStatus.getPath().getName()
-
.contains(metaClient.getTableConfig().getBaseFileFormat().getFileExtension());
+ Predicate<FileStatus> roFilePredicate = fileStatus -> {
+ String pathName = fileStatus.getPath().getName();
+ // Filter base files if:
+ // 1. file extension equals to table configured file extension
+ // 2. file is not .hoodie_partition_metadata
+ return
pathName.contains(metaClient.getTableConfig().getBaseFileFormat().getFileExtension())
+ &&
!pathName.startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX);
+ };
return
Arrays.stream(statuses).filter(roFilePredicate).map(HoodieBaseFile::new);
}
diff --git
a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
index 695f4fc03b3..4c895240ba7 100644
---
a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
+++
b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
@@ -87,6 +87,7 @@ import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import static
org.apache.hudi.common.model.HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX;
import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -292,6 +293,33 @@ public class TestHoodieTableFileSystemView extends
HoodieCommonTestHarness {
testViewForFileSlicesWithAsyncCompaction(false, true, 2, 2, true,
testBootstrap);
}
+ @Test
+ public void testViewForFileSlicesWithPartitionMetadataFile() throws
Exception {
+ String partitionPath = "2023/09/13";
+ new File(basePath + "/" + partitionPath).mkdirs();
+ new File(basePath + "/" + partitionPath + "/" +
HOODIE_PARTITION_METAFILE_PREFIX + ".parquet").mkdirs();
+
+ // create 2 fileId in partition
+ String fileId1 = UUID.randomUUID().toString();
+ String fileId2 = UUID.randomUUID().toString();
+ String commitTime1 = "1";
+ String fileName1 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN,
fileId1);
+ String fileName2 = FSUtils.makeBaseFileName(commitTime1, TEST_WRITE_TOKEN,
fileId2);
+ new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
+ new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
+ HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
+
+ HoodieInstant instant1 = new HoodieInstant(true,
HoodieTimeline.COMMIT_ACTION, commitTime1);
+ saveAsComplete(commitTimeline, instant1, Option.empty());
+ refreshFsView();
+
+ List<FileSlice> fileSlices =
fsView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
+ assertEquals(2, fileSlices.size());
+ FileSlice fileSlice = fileSlices.get(0);
+ assertEquals(commitTime1, fileSlice.getBaseInstantTime());
+ assertEquals(2, fsView.getAllFileGroups(partitionPath).count());
+ }
+
@Test
protected void testInvalidLogFiles() throws Exception {
String partitionPath = "2016/05/01";