This is an automated email from the ASF dual-hosted git repository.

mmerli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new c924cfeb50 Avoid compaction to trigger extra flushes DbLedgerStorage 
(#3959)
c924cfeb50 is described below

commit c924cfeb509c4e65e33a82ae88ad423017edb669
Author: Matteo Merli <mme...@apache.org>
AuthorDate: Mon May 22 12:21:46 2023 -0700

    Avoid compaction to trigger extra flushes DbLedgerStorage (#3959)
    
    * Avoid compaction to trigger extra flushes DbLedgerStorage
    
    * Expanded comment
    
    * Fixed test
    
    * Fixed direct io test
---
 .../ldb/SingleDirectoryDbLedgerStorage.java        | 25 ++++++++++++++++++++--
 .../bookie/storage/ldb/DbLedgerStorageTest.java    |  2 ++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
index f9553c5fe6..f54ed28e2b 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
@@ -943,9 +943,30 @@ public class SingleDirectoryDbLedgerStorage implements 
CompactableLedgerStorage
 
     @Override
     public void updateEntriesLocations(Iterable<EntryLocation> locations) 
throws IOException {
-        // Trigger a flush to have all the entries being compacted in the db 
storage
-        flush();
+        // Before updating the DB with the new location for the compacted 
entries, we need to
+        // make sure that there is no ongoing flush() operation.
+        // If there were a flush, we could have the following situation, which 
is highly
+        // unlikely though possible:
+        // 1. Flush operation has written the write-cache content into 
entry-log files
+        // 2. The DB location index is not yet updated
+        // 3. Compaction is triggered and starts compacting some of the recent 
files
+        // 4. Compaction will write the "new location" into the DB
+        // 5. The pending flush() will overwrite the DB with the "old 
location", pointing
+        //    to a file that no longer exists
+        //
+        // To avoid this race condition, we need that all the entries that are 
potentially
+        // included in the compaction round to have all the indexes already 
flushed into
+        // the DB.
+        // The easiest lightweight way to achieve this is to wait for any 
pending
+        // flush operation to be completed before updating the index with the 
compacted
+        // entries, by blocking on the flushMutex.
+        flushMutex.lock();
+        flushMutex.unlock();
 
+        // We don't need to keep the flush mutex locked here while updating 
the DB.
+        // It's fine to have a concurrent flush operation at this point, 
because we
+        // know that none of the entries being flushed was included in the 
compaction
+        // round that we are dealing with.
         entryLocationIndex.updateLocations(locations);
     }
 
diff --git 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
index 4efdf06edb..2a7e8e2869 100644
--- 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
+++ 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
@@ -225,6 +225,7 @@ public class DbLedgerStorageTest {
         entry3.writeBytes("entry-3".getBytes());
         storage.addEntry(entry3);
 
+
         // Simulate bookie compaction
         SingleDirectoryDbLedgerStorage singleDirStorage = ((DbLedgerStorage) 
storage).getLedgerStorageList().get(0);
         EntryLogger entryLogger = singleDirStorage.getEntryLogger();
@@ -236,6 +237,7 @@ public class DbLedgerStorageTest {
         long location = entryLogger.addEntry(4L, newEntry3);
         newEntry3.resetReaderIndex();
 
+        storage.flush();
         List<EntryLocation> locations = Lists.newArrayList(new 
EntryLocation(4, 3, location));
         singleDirStorage.updateEntriesLocations(locations);
 

Reply via email to