This is an automated email from the ASF dual-hosted git repository. yong pushed a commit to branch branch-4.15 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit d94d1cc062a0b0d9ee4a8dc2782f6dfca7815533 Author: Matteo Merli <[email protected]> AuthorDate: Mon May 22 12:21:46 2023 -0700 Avoid compaction to trigger extra flushes DbLedgerStorage (#3959) * Avoid compaction to trigger extra flushes DbLedgerStorage * Expanded comment * Fixed test * Fixed direct io test (cherry picked from commit c924cfeb509c4e65e33a82ae88ad423017edb669) --- .../ldb/SingleDirectoryDbLedgerStorage.java | 25 ++++++++++++++++++++-- .../bookie/storage/ldb/DbLedgerStorageTest.java | 2 ++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java index a221283691..d1f186d35a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java @@ -863,9 +863,30 @@ public class SingleDirectoryDbLedgerStorage implements CompactableLedgerStorage @Override public void updateEntriesLocations(Iterable<EntryLocation> locations) throws IOException { - // Trigger a flush to have all the entries being compacted in the db storage - flush(); + // Before updating the DB with the new location for the compacted entries, we need to + // make sure that there is no ongoing flush() operation. + // If there were a flush, we could have the following situation, which is highly + // unlikely though possible: + // 1. Flush operation has written the write-cache content into entry-log files + // 2. The DB location index is not yet updated + // 3. Compaction is triggered and starts compacting some of the recent files + // 4. Compaction will write the "new location" into the DB + // 5. The pending flush() will overwrite the DB with the "old location", pointing + // to a file that no longer exists + // + // To avoid this race condition, we need that all the entries that are potentially + // included in the compaction round to have all the indexes already flushed into + // the DB. + // The easiest lightweight way to achieve this is to wait for any pending + // flush operation to be completed before updating the index with the compacted + // entries, by blocking on the flushMutex. + flushMutex.lock(); + flushMutex.unlock(); + // We don't need to keep the flush mutex locked here while updating the DB. + // It's fine to have a concurrent flush operation at this point, because we + // know that none of the entries being flushed was included in the compaction + // round that we are dealing with. entryLocationIndex.updateLocations(locations); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java index e3fabd55b4..bb2072b8ff 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java @@ -224,6 +224,7 @@ public class DbLedgerStorageTest { entry3.writeBytes("entry-3".getBytes()); storage.addEntry(entry3); + // Simulate bookie compaction SingleDirectoryDbLedgerStorage singleDirStorage = ((DbLedgerStorage) storage).getLedgerStorageList().get(0); EntryLogger entryLogger = singleDirStorage.getEntryLogger(); @@ -234,6 +235,7 @@ public class DbLedgerStorageTest { newEntry3.writeBytes("new-entry-3".getBytes()); long location = entryLogger.addEntry(4L, newEntry3, false); + storage.flush(); List<EntryLocation> locations = Lists.newArrayList(new EntryLocation(4, 3, location)); singleDirStorage.updateEntriesLocations(locations);
