This is an automated email from the ASF dual-hosted git repository.

yong pushed a commit to branch branch-4.15
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git

commit 608f9f07b30bdbb20225d361010617624b65c6b0
Author: Hang Chen <[email protected]>
AuthorDate: Thu Mar 16 11:28:48 2023 +0800

    Add small files check in garbage collection (#3631)
    
    ### Motivation
    When we use `TransactionalEntryLogCompactor` to compact the entry log 
files, it will generate a lot of small entry log files, and for those files, 
the file usage is usually greater than 90%, which can not be compacted unless 
the file usage decreased.
    
    
![image](https://user-images.githubusercontent.com/5436568/201135615-4d6072f5-e353-483d-9afb-48fad8134044.png)
    
    ### Changes
    We introduce the entry log file size check during compaction, and the 
checker is controlled by `gcEntryLogSizeRatio`.
    If the total entry log file size is less than `gcEntryLogSizeRatio * 
logSizeLimit`, the entry log file will be compacted even though the file usage 
is greater than 90%. This feature is disabled by default and the 
`gcEntryLogSizeRatio` default value is `0.0`
    
    (cherry picked from commit 2fad33bfcf24a72f7fdf103969ed4b0aa26778a2)
---
 .../org/apache/bookkeeper/bookie/GarbageCollectorThread.java  | 11 ++++++++---
 .../java/org/apache/bookkeeper/conf/ServerConfiguration.java  | 10 ++++++++++
 conf/bk_server.conf                                           |  9 +++++++++
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java
index c88fa0bf4d..b1067aef36 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java
@@ -557,15 +557,20 @@ public class GarbageCollectorThread extends SafeRunnable {
         MutableLong timeDiff = new MutableLong(0);
 
         entryLogMetaMap.forEach((entryLogId, meta) -> {
-            int bucketIndex = calculateUsageIndex(numBuckets, meta.getUsage());
+            double usage = meta.getUsage();
+            if (conf.isUseTargetEntryLogSizeForGc() && usage < 1.0d) {
+                usage = (double) meta.getRemainingSize() / 
Math.max(meta.getTotalSize(), conf.getEntryLogSizeLimit());
+            }
+            int bucketIndex = calculateUsageIndex(numBuckets, usage);
             entryLogUsageBuckets[bucketIndex]++;
 
             if (timeDiff.getValue() < maxTimeMillis) {
                 end.setValue(System.currentTimeMillis());
                 timeDiff.setValue(end.getValue() - start);
             }
-            if (meta.getUsage() >= threshold || (maxTimeMillis > 0 && 
timeDiff.getValue() >= maxTimeMillis)
-                    || !running) {
+            if ((usage >= threshold
+                || (maxTimeMillis > 0 && timeDiff.getValue() >= maxTimeMillis)
+                || !running)) {
                 // We allow the usage limit calculation to continue so that we 
get an accurate
                 // report of where the usage was prior to running compaction.
                 return;
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
index 6327599808..c0e33d563b 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
@@ -119,6 +119,7 @@ public class ServerConfiguration extends 
AbstractConfiguration<ServerConfigurati
     protected static final String VERIFY_METADATA_ON_GC = "verifyMetadataOnGC";
     protected static final String GC_ENTRYLOGMETADATA_CACHE_ENABLED = 
"gcEntryLogMetadataCacheEnabled";
     protected static final String GC_ENTRYLOG_METADATA_CACHE_PATH = 
"gcEntryLogMetadataCachePath";
+    protected static final String USE_TARGET_ENTRYLOG_SIZE_FOR_GC = 
"useTargetEntryLogSizeForGc";
     // Scrub Parameters
     protected static final String LOCAL_SCRUB_PERIOD = "localScrubInterval";
     protected static final String LOCAL_SCRUB_RATE_LIMIT = 
"localScrubRateLimit";
@@ -552,6 +553,15 @@ public class ServerConfiguration extends 
AbstractConfiguration<ServerConfigurati
         return this;
     }
 
+    public boolean isUseTargetEntryLogSizeForGc() {
+        return getBoolean(USE_TARGET_ENTRYLOG_SIZE_FOR_GC, false);
+    }
+
+    public ServerConfiguration setUseTargetEntryLogSizeForGc(boolean 
useTargetEntryLogSizeForGc) {
+        this.setProperty(USE_TARGET_ENTRYLOG_SIZE_FOR_GC, 
useTargetEntryLogSizeForGc);
+        return this;
+    }
+
     /**
      * Get whether local scrub is enabled.
      *
diff --git a/conf/bk_server.conf b/conf/bk_server.conf
index 2c09c2c2f9..409613c996 100755
--- a/conf/bk_server.conf
+++ b/conf/bk_server.conf
@@ -618,6 +618,15 @@ gcEntryLogMetadataCacheEnabled=false
 # name "entrylogIndexCache"]
 # gcEntryLogMetadataCachePath=
 
+# When judging whether an entry log file need to be compacted, we calculate 
the usage rate of the entry log file based
+# on the actual size of the entry log file. However, if an entry log file is 
1MB in size and 0.9MB of data is
+# being used, this entry log file won't be compacted by garbage collector due 
to the high usage ratio,
+# which will result in many small entry log files.
+# We introduced the parameter `useTargetEntryLogSizeForGc` to determine 
whether to calculate entry log file usage
+# based on the configured target entry log file size, which is configured by 
`logSizeLimit`.
+# Default: useTargetEntryLogSizeForGc is false.
+# useTargetEntryLogSizeForGc=false
+
 #############################################################################
 ## Disk utilization
 #############################################################################

Reply via email to