This is an automated email from the ASF dual-hosted git repository. lhotari pushed a commit to branch branch-4.15 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit 273464108121a1bf1b2d9cdcc30f4c55003c0467 Author: ZhangJian He <shoot...@gmail.com> AuthorDate: Sat Apr 20 08:13:58 2024 +0800 improve: change scheduleAtFixedRate to scheduleWithFixedDelay in GarbageCollectorThread (#4296) ### Motivation Current scheduling mechanism for garbage collection uses scheduleAtFixedRate. This approach schedules the next execution without considering whether the current task has finished, potentially leading to overlapping executions if a task takes longer than expected. In my test environment, After task accumulation in gc thread pool, sometimes there is no entrylog to extract and no entrylogger to compact. But every round of gc, still need to compare ledger meta between local and metadata store (zk), which will result in highly frequently access to metadata store and each access will bring considerable unnecessary data flow. See https://lists.apache.org/thread/023vkc5rwyq0j776zcv8dtp7c8cml6vp Signed-off-by: ZhangJian He <shoot...@gmail.com> (cherry picked from commit f62a81dda4f1f782a3fc50067bbdffd5b6f438df) --- .../java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java | 6 +++--- conf/bk_server.conf | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java index 2a0d794b5b..9aaac742f9 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java @@ -65,12 +65,12 @@ public class GarbageCollectorThread extends SafeRunnable { private static final long MINUTE = TimeUnit.MINUTES.toMillis(1); // Maps entry log files to the set of ledgers that comprise the file and the size usage per ledger - private EntryLogMetadataMap entryLogMetaMap; + private final EntryLogMetadataMap entryLogMetaMap; private final ScheduledExecutorService gcExecutor; Future<?> scheduledFuture = null; - // This is how often we want to run the Garbage Collector Thread (in milliseconds). + // This is the fixed delay in milliseconds before running the Garbage Collector Thread again. final long gcWaitTime; // Compaction parameters @@ -365,7 +365,7 @@ public class GarbageCollectorThread extends SafeRunnable { scheduledFuture.cancel(false); } long initialDelay = getModInitialDelay(); - scheduledFuture = gcExecutor.scheduleAtFixedRate(this, initialDelay, gcWaitTime, TimeUnit.MILLISECONDS); + scheduledFuture = gcExecutor.scheduleWithFixedDelay(this, initialDelay, gcWaitTime, TimeUnit.MILLISECONDS); } /** diff --git a/conf/bk_server.conf b/conf/bk_server.conf index 409613c996..66591d861d 100755 --- a/conf/bk_server.conf +++ b/conf/bk_server.conf @@ -587,7 +587,7 @@ ledgerDirectories=/tmp/bk-data ## Garbage collection settings ############################################################################# -# How long the interval to trigger next garbage collection, in milliseconds +# Fixed delay in milliseconds to trigger the next garbage collection # Since garbage collection is running in background, too frequent gc # will heart performance. It is better to give a higher number of gc # interval if there is enough disk capacity.