This is an automated email from the ASF dual-hosted git repository. yong pushed a commit to branch branch-4.15 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit 608f9f07b30bdbb20225d361010617624b65c6b0 Author: Hang Chen <[email protected]> AuthorDate: Thu Mar 16 11:28:48 2023 +0800 Add small files check in garbage collection (#3631) ### Motivation When we use `TransactionalEntryLogCompactor` to compact the entry log files, it will generate a lot of small entry log files, and for those files, the file usage is usually greater than 90%, which can not be compacted unless the file usage decreased.  ### Changes We introduce the entry log file size check during compaction, and the checker is controlled by `gcEntryLogSizeRatio`. If the total entry log file size is less than `gcEntryLogSizeRatio * logSizeLimit`, the entry log file will be compacted even though the file usage is greater than 90%. This feature is disabled by default and the `gcEntryLogSizeRatio` default value is `0.0` (cherry picked from commit 2fad33bfcf24a72f7fdf103969ed4b0aa26778a2) --- .../org/apache/bookkeeper/bookie/GarbageCollectorThread.java | 11 ++++++++--- .../java/org/apache/bookkeeper/conf/ServerConfiguration.java | 10 ++++++++++ conf/bk_server.conf | 9 +++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java index c88fa0bf4d..b1067aef36 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java @@ -557,15 +557,20 @@ public class GarbageCollectorThread extends SafeRunnable { MutableLong timeDiff = new MutableLong(0); entryLogMetaMap.forEach((entryLogId, meta) -> { - int bucketIndex = calculateUsageIndex(numBuckets, meta.getUsage()); + double usage = meta.getUsage(); + if (conf.isUseTargetEntryLogSizeForGc() && usage < 1.0d) { + usage = (double) meta.getRemainingSize() / Math.max(meta.getTotalSize(), conf.getEntryLogSizeLimit()); + } + int bucketIndex = calculateUsageIndex(numBuckets, usage); entryLogUsageBuckets[bucketIndex]++; if (timeDiff.getValue() < maxTimeMillis) { end.setValue(System.currentTimeMillis()); timeDiff.setValue(end.getValue() - start); } - if (meta.getUsage() >= threshold || (maxTimeMillis > 0 && timeDiff.getValue() >= maxTimeMillis) - || !running) { + if ((usage >= threshold + || (maxTimeMillis > 0 && timeDiff.getValue() >= maxTimeMillis) + || !running)) { // We allow the usage limit calculation to continue so that we get an accurate // report of where the usage was prior to running compaction. return; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java index 6327599808..c0e33d563b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java @@ -119,6 +119,7 @@ public class ServerConfiguration extends AbstractConfiguration<ServerConfigurati protected static final String VERIFY_METADATA_ON_GC = "verifyMetadataOnGC"; protected static final String GC_ENTRYLOGMETADATA_CACHE_ENABLED = "gcEntryLogMetadataCacheEnabled"; protected static final String GC_ENTRYLOG_METADATA_CACHE_PATH = "gcEntryLogMetadataCachePath"; + protected static final String USE_TARGET_ENTRYLOG_SIZE_FOR_GC = "useTargetEntryLogSizeForGc"; // Scrub Parameters protected static final String LOCAL_SCRUB_PERIOD = "localScrubInterval"; protected static final String LOCAL_SCRUB_RATE_LIMIT = "localScrubRateLimit"; @@ -552,6 +553,15 @@ public class ServerConfiguration extends AbstractConfiguration<ServerConfigurati return this; } + public boolean isUseTargetEntryLogSizeForGc() { + return getBoolean(USE_TARGET_ENTRYLOG_SIZE_FOR_GC, false); + } + + public ServerConfiguration setUseTargetEntryLogSizeForGc(boolean useTargetEntryLogSizeForGc) { + this.setProperty(USE_TARGET_ENTRYLOG_SIZE_FOR_GC, useTargetEntryLogSizeForGc); + return this; + } + /** * Get whether local scrub is enabled. * diff --git a/conf/bk_server.conf b/conf/bk_server.conf index 2c09c2c2f9..409613c996 100755 --- a/conf/bk_server.conf +++ b/conf/bk_server.conf @@ -618,6 +618,15 @@ gcEntryLogMetadataCacheEnabled=false # name "entrylogIndexCache"] # gcEntryLogMetadataCachePath= +# When judging whether an entry log file need to be compacted, we calculate the usage rate of the entry log file based +# on the actual size of the entry log file. However, if an entry log file is 1MB in size and 0.9MB of data is +# being used, this entry log file won't be compacted by garbage collector due to the high usage ratio, +# which will result in many small entry log files. +# We introduced the parameter `useTargetEntryLogSizeForGc` to determine whether to calculate entry log file usage +# based on the configured target entry log file size, which is configured by `logSizeLimit`. +# Default: useTargetEntryLogSizeForGc is false. +# useTargetEntryLogSizeForGc=false + ############################################################################# ## Disk utilization #############################################################################
