This is an automated email from the ASF dual-hosted git repository. mmerli pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push: new c924cfeb50 Avoid compaction to trigger extra flushes DbLedgerStorage (#3959) c924cfeb50 is described below commit c924cfeb509c4e65e33a82ae88ad423017edb669 Author: Matteo Merli <mme...@apache.org> AuthorDate: Mon May 22 12:21:46 2023 -0700 Avoid compaction to trigger extra flushes DbLedgerStorage (#3959) * Avoid compaction to trigger extra flushes DbLedgerStorage * Expanded comment * Fixed test * Fixed direct io test --- .../ldb/SingleDirectoryDbLedgerStorage.java | 25 ++++++++++++++++++++-- .../bookie/storage/ldb/DbLedgerStorageTest.java | 2 ++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java index f9553c5fe6..f54ed28e2b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java @@ -943,9 +943,30 @@ public class SingleDirectoryDbLedgerStorage implements CompactableLedgerStorage @Override public void updateEntriesLocations(Iterable<EntryLocation> locations) throws IOException { - // Trigger a flush to have all the entries being compacted in the db storage - flush(); + // Before updating the DB with the new location for the compacted entries, we need to + // make sure that there is no ongoing flush() operation. + // If there were a flush, we could have the following situation, which is highly + // unlikely though possible: + // 1. Flush operation has written the write-cache content into entry-log files + // 2. The DB location index is not yet updated + // 3. Compaction is triggered and starts compacting some of the recent files + // 4. Compaction will write the "new location" into the DB + // 5. The pending flush() will overwrite the DB with the "old location", pointing + // to a file that no longer exists + // + // To avoid this race condition, we need that all the entries that are potentially + // included in the compaction round to have all the indexes already flushed into + // the DB. + // The easiest lightweight way to achieve this is to wait for any pending + // flush operation to be completed before updating the index with the compacted + // entries, by blocking on the flushMutex. + flushMutex.lock(); + flushMutex.unlock(); + // We don't need to keep the flush mutex locked here while updating the DB. + // It's fine to have a concurrent flush operation at this point, because we + // know that none of the entries being flushed was included in the compaction + // round that we are dealing with. entryLocationIndex.updateLocations(locations); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java index 4efdf06edb..2a7e8e2869 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java @@ -225,6 +225,7 @@ public class DbLedgerStorageTest { entry3.writeBytes("entry-3".getBytes()); storage.addEntry(entry3); + // Simulate bookie compaction SingleDirectoryDbLedgerStorage singleDirStorage = ((DbLedgerStorage) storage).getLedgerStorageList().get(0); EntryLogger entryLogger = singleDirStorage.getEntryLogger(); @@ -236,6 +237,7 @@ public class DbLedgerStorageTest { long location = entryLogger.addEntry(4L, newEntry3); newEntry3.resetReaderIndex(); + storage.flush(); List<EntryLocation> locations = Lists.newArrayList(new EntryLocation(4, 3, location)); singleDirStorage.updateEntriesLocations(locations);