alexeykudinkin commented on code in PR #5259:
URL: https://github.com/apache/hudi/pull/5259#discussion_r845663828


##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java:
##########
@@ -1051,28 +1043,31 @@ private void initialCommit(String createInstantTime, 
List<MetadataPartitionType>
       partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, 
recordsRDD);
     }
 
-    LOG.info("Committing " + partitions.size() + " partitions and " + 
totalFiles + " files to metadata");
+    LOG.info("Committing " + partitions.size() + " partitions and " + 
partitionToFilesMap.values().size() + " files to metadata");
+
     commit(createInstantTime, partitionToRecordsMap, false);
   }
 
   private HoodieData<HoodieRecord> getFilesPartitionRecords(String 
createInstantTime, List<DirectoryInfo> partitionInfoList, HoodieRecord 
allPartitionRecord) {
     HoodieData<HoodieRecord> filesPartitionRecords = 
engineContext.parallelize(Arrays.asList(allPartitionRecord), 1);
-    if (!partitionInfoList.isEmpty()) {
-      HoodieData<HoodieRecord> fileListRecords = 
engineContext.parallelize(partitionInfoList, 
partitionInfoList.size()).map(partitionInfo -> {
-        Map<String, Long> fileNameToSizeMap = 
partitionInfo.getFileNameToSizeMap();
-        // filter for files that are part of the completed commits
-        Map<String, Long> validFileNameToSizeMap = 
fileNameToSizeMap.entrySet().stream().filter(fileSizePair -> {
-          String commitTime = FSUtils.getCommitTime(fileSizePair.getKey());
-          return HoodieTimeline.compareTimestamps(commitTime, 
HoodieTimeline.LESSER_THAN_OR_EQUALS, createInstantTime);
-        }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
-
-        // Record which saves files within a partition
-        return HoodieMetadataPayload.createPartitionFilesRecord(
-            
HoodieTableMetadataUtil.getPartition(partitionInfo.getRelativePath()), 
Option.of(validFileNameToSizeMap), Option.empty());
-      });
-      filesPartitionRecords = filesPartitionRecords.union(fileListRecords);
+    if (partitionInfoList.isEmpty()) {

Review Comment:
   Just inverted conditional to simplify control flow



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java:
##########
@@ -1012,28 +1011,21 @@ private void initialCommit(String createInstantTime, 
List<MetadataPartitionType>
     LOG.info("Initializing metadata table by using file listings in " + 
dataWriteConfig.getBasePath());
     engineContext.setJobStatus(this.getClass().getSimpleName(), "Initializing 
metadata table by listing files and partitions");
 
+    Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap 
= new HashMap<>();
+
     List<DirectoryInfo> partitionInfoList = listAllPartitions(dataMetaClient);
-    List<String> partitions = new ArrayList<>();
-    AtomicLong totalFiles = new AtomicLong(0);
-    Map<String, Map<String, Long>> partitionToFilesMap = 
partitionInfoList.stream().map(p -> {
-      final String partitionName = 
HoodieTableMetadataUtil.getPartition(p.getRelativePath());
-      partitions.add(partitionName);
-      totalFiles.addAndGet(p.getTotalFiles());
-      return Pair.of(partitionName, p.getFileNameToSizeMap());
-    }).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
-    final Map<MetadataPartitionType, HoodieData<HoodieRecord>> 
partitionToRecordsMap = new HashMap<>();
+    Map<String, Map<String, Long>> partitionToFilesMap = 
partitionInfoList.stream()
+        .map(p -> {
+          String partitionName = 
HoodieTableMetadataUtil.getPartitionIdentifier(p.getRelativePath());
+          return Pair.of(partitionName, p.getFileNameToSizeMap());
+        })
+        .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
+
+    List<String> partitions = new ArrayList<>(partitionToFilesMap.keySet());
 
     if (partitionTypes.contains(MetadataPartitionType.FILES)) {
       // Record which saves the list of all partitions
       HoodieRecord allPartitionRecord = 
HoodieMetadataPayload.createPartitionListRecord(partitions);
-      if (partitions.isEmpty()) {

Review Comment:
   This is just pure duplication



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to