ArafatKhan2198 commented on code in PR #9243:
URL: https://github.com/apache/ozone/pull/9243#discussion_r2509102850
##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/FileSizeCountTaskHelper.java:
##########
@@ -134,31 +152,40 @@ public static boolean reprocessBucketLayout(BucketLayout
bucketLayout,
OMMetadataManager
omMetadataManager,
Map<FileSizeCountKey, Long>
fileSizeCountMap,
ReconFileMetadataManager
reconFileMetadataManager,
- String taskName) {
+ String taskName,
+ int maxIterators,
+ int maxWorkers,
+ int maxKeysInMemory) {
+ LOG.info("{}: Starting parallel iteration with {} iterators, {} workers
for bucket layout {}",
+ taskName, maxIterators, maxWorkers, bucketLayout);
Table<String, OmKeyInfo> omKeyInfoTable =
omMetadataManager.getKeyTable(bucketLayout);
- int totalKeysProcessed = 0;
+ long startTime = Time.monotonicNow();
- try (TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>>
keyIter =
- omKeyInfoTable.iterator()) {
- while (keyIter.hasNext()) {
- Table.KeyValue<String, OmKeyInfo> kv = keyIter.next();
- handlePutKeyEvent(kv.getValue(), fileSizeCountMap);
- totalKeysProcessed++;
-
- // Flush to RocksDB periodically.
- if (fileSizeCountMap.size() >= 100000) {
- // For reprocess, we don't need to check existing values since table
was truncated
- LOG.debug("Flushing {} accumulated counts to RocksDB for {}",
fileSizeCountMap.size(), taskName);
- writeCountsToDB(fileSizeCountMap, reconFileMetadataManager);
- fileSizeCountMap.clear();
- }
- }
- } catch (IOException ioEx) {
- LOG.error("Unable to populate File Size Count for {} in RocksDB.",
taskName, ioEx);
+ // Use parallel table iteration
+ Function<Table.KeyValue<String, OmKeyInfo>, Void> kvOperation = kv -> {
+ handlePutKeyEvent(kv.getValue(), fileSizeCountMap);
+ return null;
+ };
+
+ try (ParallelTableIteratorOperation<String, OmKeyInfo> keyIter =
+ new ParallelTableIteratorOperation<>(omMetadataManager,
omKeyInfoTable,
+ StringCodec.get(), maxIterators, maxWorkers, maxKeysInMemory,
100000)) {
Review Comment:
Had removed it by mistake, will add it back
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]