This is an automated email from the ASF dual-hosted git repository.

chenhang pushed a commit to branch branch-4.14
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git

commit 10740fd5b83af488a4f20b6f8fab5722838a69ba
Author: Matteo Merli <[email protected]>
AuthorDate: Mon May 22 12:21:46 2023 -0700

    Avoid compaction to trigger extra flushes DbLedgerStorage (#3959)
    
    * Avoid compaction to trigger extra flushes DbLedgerStorage
    
    * Expanded comment
    
    * Fixed test
    
    * Fixed direct io test
    
    (cherry picked from commit c924cfeb509c4e65e33a82ae88ad423017edb669)
---
 .../ldb/SingleDirectoryDbLedgerStorage.java        | 25 ++++++++++++++++++++--
 .../bookie/storage/ldb/DbLedgerStorageTest.java    |  2 ++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
index dd2027e47b..23013e0139 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
@@ -755,9 +755,30 @@ public class SingleDirectoryDbLedgerStorage implements 
CompactableLedgerStorage
 
     @Override
     public void updateEntriesLocations(Iterable<EntryLocation> locations) 
throws IOException {
-        // Trigger a flush to have all the entries being compacted in the db 
storage
-        flush();
+        // Before updating the DB with the new location for the compacted 
entries, we need to
+        // make sure that there is no ongoing flush() operation.
+        // If there were a flush, we could have the following situation, which 
is highly
+        // unlikely though possible:
+        // 1. Flush operation has written the write-cache content into 
entry-log files
+        // 2. The DB location index is not yet updated
+        // 3. Compaction is triggered and starts compacting some of the recent 
files
+        // 4. Compaction will write the "new location" into the DB
+        // 5. The pending flush() will overwrite the DB with the "old 
location", pointing
+        //    to a file that no longer exists
+        //
+        // To avoid this race condition, we need that all the entries that are 
potentially
+        // included in the compaction round to have all the indexes already 
flushed into
+        // the DB.
+        // The easiest lightweight way to achieve this is to wait for any 
pending
+        // flush operation to be completed before updating the index with the 
compacted
+        // entries, by blocking on the flushMutex.
+        flushMutex.lock();
+        flushMutex.unlock();
 
+        // We don't need to keep the flush mutex locked here while updating 
the DB.
+        // It's fine to have a concurrent flush operation at this point, 
because we
+        // know that none of the entries being flushed was included in the 
compaction
+        // round that we are dealing with.
         entryLocationIndex.updateLocations(locations);
     }
 
diff --git 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
index f5d3cefe6c..a39d380e6d 100644
--- 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
+++ 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
@@ -222,6 +222,7 @@ public class DbLedgerStorageTest {
         entry3.writeBytes("entry-3".getBytes());
         storage.addEntry(entry3);
 
+
         // Simulate bookie compaction
         SingleDirectoryDbLedgerStorage singleDirStorage = ((DbLedgerStorage) 
storage).getLedgerStorageList().get(0);
         EntryLogger entryLogger = singleDirStorage.getEntryLogger();
@@ -232,6 +233,7 @@ public class DbLedgerStorageTest {
         newEntry3.writeBytes("new-entry-3".getBytes());
         long location = entryLogger.addEntry(4L, newEntry3, false);
 
+        storage.flush();
         List<EntryLocation> locations = Lists.newArrayList(new 
EntryLocation(4, 3, location));
         singleDirStorage.updateEntriesLocations(locations);
 

Reply via email to