GeorgeJahad commented on code in PR #4206:
URL: https://github.com/apache/ozone/pull/4206#discussion_r1103473693


##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/FileSizeCountTask.java:
##########
@@ -76,27 +76,44 @@ public FileSizeCountTask(FileCountBySizeDao 
fileCountBySizeDao,
    */
   @Override
   public Pair<String, Boolean> reprocess(OMMetadataManager omMetadataManager) {
-    Table<String, OmKeyInfo> omKeyInfoTable =
-        omMetadataManager.getKeyTable(getBucketLayout());
+    // Map to store the count of files based on file size
     Map<FileSizeCountKey, Long> fileSizeCountMap = new HashMap<>();
+
+    // Delete all records from FILE_COUNT_BY_SIZE table
+    int execute = dslContext.delete(FILE_COUNT_BY_SIZE).execute();
+    LOG.info("Deleted {} records from {}", execute, FILE_COUNT_BY_SIZE);
+
+    // Call reprocessBucket method for FILE_SYSTEM_OPTIMIZED bucket layout
+    reprocessBucket(BucketLayout.FILE_SYSTEM_OPTIMIZED, omMetadataManager,
+        fileSizeCountMap);
+    // Call reprocessBucket method for LEGACY bucket layout
+    reprocessBucket(BucketLayout.LEGACY, omMetadataManager, fileSizeCountMap);
+
+    writeCountsToDB(true, fileSizeCountMap);
+    LOG.info("Completed a 'reprocess' run of FileSizeCountTask.");
+    return new ImmutablePair<>(getTaskName(), true);
+  }
+
+  private void reprocessBucket(BucketLayout bucketLayout,
+                               OMMetadataManager omMetadataManager,
+                               Map<FileSizeCountKey, Long> fileSizeCountMap) {
+    Table<String, OmKeyInfo> omKeyInfoTable =
+        omMetadataManager.getKeyTable(bucketLayout);
     try (TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>>
-        keyIter = omKeyInfoTable.iterator()) {
+             keyIter = omKeyInfoTable.iterator()) {
       while (keyIter.hasNext()) {
         Table.KeyValue<String, OmKeyInfo> kv = keyIter.next();
         handlePutKeyEvent(kv.getValue(), fileSizeCountMap);
+        //  The time complexity of .size() method is constant time, O(1)
+        if (fileSizeCountMap.size() >= 100000) {
+          writeCountsToDB(true, fileSizeCountMap);
+          fileSizeCountMap.clear();
+        }
       }
     } catch (IOException ioEx) {
-      LOG.error("Unable to populate File Size Count in Recon DB. ", ioEx);
-      return new ImmutablePair<>(getTaskName(), false);
+      LOG.error("Unable to populate File Size Count for " + bucketLayout +
+          " in Recon DB. ", ioEx);

Review Comment:
   so what happened to the exception?  is it just being ignored?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to