zhangyue19921010 commented on a change in pull request #4994:
URL: https://github.com/apache/hudi/pull/4994#discussion_r825659273



##########
File path: 
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
##########
@@ -345,16 +358,59 @@ public void doMetadataTableValidation() {
     boolean finalResult = true;
     metaClient.reloadActiveTimeline();
     String basePath = metaClient.getBasePath();
+    List<String> baseFilesUnderDeletion = Collections.emptyList();
+
+    if (cfg.skipUnderDeletionDataFiles) {
+      HoodieTimeline pendingCleaningTimeline = metaClient.getActiveTimeline()
+          .getCleanerTimeline()
+          .filter(instant -> instant.getState() != 
HoodieInstant.State.COMPLETED);
+
+      baseFilesUnderDeletion = 
pendingCleaningTimeline.getInstants().flatMap(instant -> {
+        try {
+          if (instant.isInflight()) {
+            // convert inflight instant to requested and get clean plan
+            instant = new HoodieInstant(HoodieInstant.State.REQUESTED, 
instant.getAction(), instant.getTimestamp());
+          }
+          HoodieCleanerPlan cleanerPlan = 
CleanerUtils.getCleanerPlan(metaClient, instant);
+
+          return 
cleanerPlan.getFilePathsToBeDeletedPerPartition().values().stream().flatMap(cleanerFIleInfoList
 -> {
+            return cleanerFIleInfoList.stream().map(fileInfo -> {
+              return new Path(fileInfo.getFilePath()).getName();
+            });
+          });
+
+        } catch (HoodieIOException ex) {
+
+          if (ex.getIOException() instanceof FileNotFoundException) {
+            // cleaner instant could be deleted by archive and 
FileNotFoundException could be threw during getInstantDetails function

Review comment:
       Actually no need to check FileNotFoundException here. removed.

##########
File path: 
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
##########
@@ -410,42 +466,64 @@ public void doMetadataTableValidation() {
    * @param metadataTableBasedContext Validation context containing 
information based on metadata table
    * @param fsBasedContext            Validation context containing 
information based on the file system
    * @param partitionPath             Partition path String
+   * @param baseFilesUnderDeletion    Base files under un-complete cleaner 
action
    */
   private void validateFilesInPartition(
       HoodieMetadataValidationContext metadataTableBasedContext,
-      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath,
+      List<String> baseFilesUnderDeletion) {
     if (cfg.validateLatestFileSlices) {
-      validateLatestFileSlices(metadataTableBasedContext, fsBasedContext, 
partitionPath);
+      validateLatestFileSlices(metadataTableBasedContext, fsBasedContext, 
partitionPath, baseFilesUnderDeletion);
     }
 
     if (cfg.validateLatestBaseFiles) {
-      validateLatestBaseFiles(metadataTableBasedContext, fsBasedContext, 
partitionPath);
+      validateLatestBaseFiles(metadataTableBasedContext, fsBasedContext, 
partitionPath, baseFilesUnderDeletion);
     }
 
     if (cfg.validateAllFileGroups) {
-      validateAllFileGroups(metadataTableBasedContext, fsBasedContext, 
partitionPath);
+      validateAllFileGroups(metadataTableBasedContext, fsBasedContext, 
partitionPath, baseFilesUnderDeletion);
     }
 
     if (cfg.validateAllColumnStats) {
-      validateAllColumnStats(metadataTableBasedContext, fsBasedContext, 
partitionPath);
+      validateAllColumnStats(metadataTableBasedContext, fsBasedContext, 
partitionPath, baseFilesUnderDeletion);
     }
 
     if (cfg.validateBloomFilters) {
-      validateBloomFilters(metadataTableBasedContext, fsBasedContext, 
partitionPath);
+      validateBloomFilters(metadataTableBasedContext, fsBasedContext, 
partitionPath, baseFilesUnderDeletion);
     }
   }
 
   private void validateAllFileGroups(
       HoodieMetadataValidationContext metadataTableBasedContext,
-      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
-    List<FileSlice> allFileSlicesFromMeta = metadataTableBasedContext
-        .getSortedAllFileGroupList(partitionPath).stream()
-        .flatMap(HoodieFileGroup::getAllFileSlices).sorted(new 
FileSliceComparator())
-        .collect(Collectors.toList());
-    List<FileSlice> allFileSlicesFromFS = fsBasedContext
-        .getSortedAllFileGroupList(partitionPath).stream()
-        .flatMap(HoodieFileGroup::getAllFileSlices).sorted(new 
FileSliceComparator())
-        .collect(Collectors.toList());
+      HoodieMetadataValidationContext fsBasedContext,
+      String partitionPath,
+      List<String> baseFilesUnderDeletion) {

Review comment:
       chanegd.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to