yihua commented on a change in pull request #4878:
URL: https://github.com/apache/hudi/pull/4878#discussion_r816168666



##########
File path: 
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
##########
@@ -438,27 +481,62 @@ private void 
validateLatestBaseFiles(HoodieTableFileSystemView metaFsView, Hoodi
   /**
    * Compare getLatestFileSlices between metadata table and fileSystem.
    */
-  private void validateLatestFileSlices(HoodieTableFileSystemView metaFsView, 
HoodieTableFileSystemView fsView, String partitionPath) {
+  private void validateLatestFileSlices(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
 
-    List<FileSlice> latestFileSlicesFromMetadataTable = 
metaFsView.getLatestFileSlices(partitionPath).sorted(new 
FileSliceCompactor()).collect(Collectors.toList());
-    List<FileSlice> latestFileSlicesFromFS = 
fsView.getLatestFileSlices(partitionPath).sorted(new 
FileSliceCompactor()).collect(Collectors.toList());
+    List<FileSlice> latestFileSlicesFromMetadataTable = 
metadataTableBasedContext.getSortedLatestFileSliceList(partitionPath);
+    List<FileSlice> latestFileSlicesFromFS = 
fsBasedContext.getSortedLatestFileSliceList(partitionPath);
 
-    LOG.info("Latest file list from metadata: " + 
latestFileSlicesFromMetadataTable + ". For partition " + partitionPath);
-    LOG.info("Latest file list from direct listing: " + latestFileSlicesFromFS 
+ ". For partition " + partitionPath);
+    LOG.debug("Latest file list from metadata: " + 
latestFileSlicesFromMetadataTable + ". For partition " + partitionPath);
+    LOG.debug("Latest file list from direct listing: " + 
latestFileSlicesFromFS + ". For partition " + partitionPath);
 
-    validateFileSlice(latestFileSlicesFromMetadataTable, 
latestFileSlicesFromFS, partitionPath);
+    validate(latestFileSlicesFromMetadataTable, latestFileSlicesFromFS, 
partitionPath, "file slices");
     LOG.info("Validation of getLatestFileSlices succeeded for partition " + 
partitionPath);
   }
 
-  private HoodieTableFileSystemView 
createHoodieTableFileSystemView(HoodieSparkEngineContext engineContext, boolean 
enableMetadataTable) {
+  private void validateAllColumnStats(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+    List<String> latestBaseFilenameList = 
fsBasedContext.getSortedLatestBaseFileList(partitionPath)
+        .stream().map(BaseFile::getFileName).collect(Collectors.toList());
+    List<HoodieColumnRangeMetadata<String>> metadataBasedColStats = 
metadataTableBasedContext
+        .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);
+    List<HoodieColumnRangeMetadata<String>> fsBasedColStats = fsBasedContext
+        .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);
 
-    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder()
-        .enable(enableMetadataTable)
-        .withAssumeDatePartitioning(cfg.assumeDatePartitioning)
-        .build();
+    validate(metadataBasedColStats, fsBasedColStats, partitionPath, "column 
stats");
 
-    return FileSystemViewManager.createInMemoryFileSystemView(engineContext,
-        metaClient, metadataConfig);
+    LOG.info("Validation of column stats succeeded for partition " + 
partitionPath);
+  }
+
+  private void validateBloomFilters(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+    List<String> latestBaseFilenameList = 
fsBasedContext.getSortedLatestBaseFileList(partitionPath)
+        .stream().map(BaseFile::getFileName).collect(Collectors.toList());
+    List<BloomFilterData> metadataBasedBloomFilters = metadataTableBasedContext

Review comment:
       Thanks for raising this.  The same reasoning as above.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to