nsivabalan commented on a change in pull request #4114:
URL: https://github.com/apache/hudi/pull/4114#discussion_r757659563
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -769,8 +773,12 @@ public DirectoryInfo(String relativePath, FileStatus[]
fileStatus) {
} else if
(status.getPath().getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE))
{
// Presence of partition meta file implies this is a HUDI partition
this.isHoodiePartition = true;
- } else if (FSUtils.isDataFile(status.getPath())) {
- // Regular HUDI data file (base file or log file)
+ } else if (FSUtils.isBaseFile(status.getPath())) {
+ final String commitTime =
FSUtils.getCommitTime(status.getPath().toString());
+ if
(completedInstantsTimeline.containsOrBeforeTimelineStarts(commitTime)) {
+ filenameToSizeMap.put(status.getPath().getName(), status.getLen());
+ }
Review comment:
can you leave a comment here as to why we do this
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -754,12 +758,12 @@ protected void bootstrapCommit(List<DirectoryInfo>
partitionInfoList, String cre
// Is this a hoodie partition
private boolean isHoodiePartition = false;
- public DirectoryInfo(String relativePath, FileStatus[] fileStatus) {
+ public DirectoryInfo(final HoodieTableMetaClient datasetMetaClient, String
relativePath, FileStatus[] fileStatus) {
this.relativePath = relativePath;
// Pre-allocate with the maximum length possible
filenameToSizeMap = new HashMap<>(fileStatus.length);
-
+ final HoodieTimeline completedInstantsTimeline =
datasetMetaClient.getActiveTimeline().filterCompletedInstants();
Review comment:
we can do the filtering outside and pass it in here.
##########
File path:
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java
##########
@@ -351,10 +352,10 @@ public void
testHoodieClientMultiWriterWithClustering(HoodieTableType tableType)
numRecords, 200, 2);
client2.commit(newCommitTime, result2);
// Schedule and run clustering while previous writer for commit 003 is
running
- SparkRDDWriteClient client3 = getHoodieWriteClient(cfg);
+ SparkRDDWriteClient client3 = getHoodieWriteClient(cfg3);
// schedule clustering
Option<String> clusterInstant =
client3.scheduleTableService(Option.empty(), TableServiceType.CLUSTER);
- assertFalse(clusterInstant.isPresent());
+ assertTrue(clusterInstant.isPresent());
Review comment:
you might have conflicts when you rebase. just watch out
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]