This is an automated email from the ASF dual-hosted git repository. shoothzj pushed a commit to branch branch-4.17 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit e67685b357a47dbc18f3a2de1f7da073770ef0d8 Author: ZhangJian He <[email protected]> AuthorDate: Sat Apr 20 08:13:58 2024 +0800 improve: change scheduleAtFixedRate to scheduleWithFixedDelay in GarbageCollectorThread (#4296) ### Motivation Current scheduling mechanism for garbage collection uses scheduleAtFixedRate. This approach schedules the next execution without considering whether the current task has finished, potentially leading to overlapping executions if a task takes longer than expected. In my test environment, After task accumulation in gc thread pool, sometimes there is no entrylog to extract and no entrylogger to compact. But every round of gc, still need to compare ledger meta between local and metadata store (zk), which will result in highly frequently access to metadata store and each access will bring considerable unnecessary data flow. See https://lists.apache.org/thread/023vkc5rwyq0j776zcv8dtp7c8cml6vp Signed-off-by: ZhangJian He <[email protected]> (cherry picked from commit f62a81dda4f1f782a3fc50067bbdffd5b6f438df) --- .../java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java | 6 +++--- conf/bk_server.conf | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java index 9839e49c65..14a2210004 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java @@ -62,12 +62,12 @@ public class GarbageCollectorThread implements Runnable { private static final long MINUTE = TimeUnit.MINUTES.toMillis(1); // Maps entry log files to the set of ledgers that comprise the file and the size usage per ledger - private EntryLogMetadataMap entryLogMetaMap; + private final EntryLogMetadataMap entryLogMetaMap; private final ScheduledExecutorService gcExecutor; Future<?> scheduledFuture = null; - // This is how often we want to run the Garbage Collector Thread (in milliseconds). + // This is the fixed delay in milliseconds before running the Garbage Collector Thread again. final long gcWaitTime; // Compaction parameters @@ -374,7 +374,7 @@ public class GarbageCollectorThread implements Runnable { scheduledFuture.cancel(false); } long initialDelay = getModInitialDelay(); - scheduledFuture = gcExecutor.scheduleAtFixedRate(this, initialDelay, gcWaitTime, TimeUnit.MILLISECONDS); + scheduledFuture = gcExecutor.scheduleWithFixedDelay(this, initialDelay, gcWaitTime, TimeUnit.MILLISECONDS); } /** diff --git a/conf/bk_server.conf b/conf/bk_server.conf index a36a2fbf97..175dca7334 100755 --- a/conf/bk_server.conf +++ b/conf/bk_server.conf @@ -590,7 +590,7 @@ ledgerDirectories=/tmp/bk-data ## Garbage collection settings ############################################################################# -# How long the interval to trigger next garbage collection, in milliseconds +# Fixed delay in milliseconds to trigger the next garbage collection # Since garbage collection is running in background, too frequent gc # will heart performance. It is better to give a higher number of gc # interval if there is enough disk capacity.
