This is an automated email from the ASF dual-hosted git repository.
chenhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 696919cccb bring back deleteRange for RocksDB to improve location
delete performance (#3653)
696919cccb is described below
commit 696919cccb2a4d030dab8e7d91e4b72f9a69d302
Author: Hang Chen <[email protected]>
AuthorDate: Fri Nov 18 16:16:22 2022 +0800
bring back deleteRange for RocksDB to improve location delete performance
(#3653)
### Motivation
The entry log location index deletion is deleted in batches one by one
currently, and it will have low performance. Refer to:
https://github.com/apache/bookkeeper/pull/3646
Matteo has introduced deleteRange API a few years ago, but rollback due to
RocksDB delete ranges bug. https://github.com/apache/bookkeeper/pull/1620. The
RocksDB bug has been addressed since 5.18.0
https://github.com/facebook/rocksdb/blob/main/HISTORY.md#5180-2018-11-30. We
can bring the `deleteRange` API back to improve the entry log location deletion
performance.
### Changes
Bring `deleteRange` API back for entry log location deletion.
---
.../bookie/storage/ldb/EntryLocationIndex.java | 72 +++-------------------
.../bookkeeper/conf/ServerConfiguration.java | 22 -------
.../bookie/storage/ldb/EntryLocationIndexTest.java | 64 +++++++++++++++++++
conf/bk_server.conf | 4 --
4 files changed, 73 insertions(+), 89 deletions(-)
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndex.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndex.java
index 0fd48291f7..067d08bbf4 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndex.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndex.java
@@ -47,14 +47,11 @@ public class EntryLocationIndex implements Closeable {
private final KeyValueStorage locationsDb;
private final ConcurrentLongHashSet deletedLedgers =
ConcurrentLongHashSet.newBuilder().build();
- private final int deleteEntriesBatchSize;
-
private final EntryLocationIndexStats stats;
public EntryLocationIndex(ServerConfiguration conf, KeyValueStorageFactory
storageFactory, String basePath,
StatsLogger stats) throws IOException {
locationsDb = storageFactory.newKeyValueStorage(basePath, "locations",
DbConfigType.EntryLocation, conf);
- deleteEntriesBatchSize = conf.getRocksDBDeleteEntriesBatchSize();
this.stats = new EntryLocationIndexStats(
stats,
@@ -195,7 +192,6 @@ public class EntryLocationIndex implements Closeable {
public void removeOffsetFromDeletedLedgers() throws IOException {
LongPairWrapper firstKeyWrapper = LongPairWrapper.get(-1, -1);
LongPairWrapper lastKeyWrapper = LongPairWrapper.get(-1, -1);
- LongPairWrapper keyToDelete = LongPairWrapper.get(-1, -1);
Set<Long> ledgersToDelete = deletedLedgers.items();
@@ -205,12 +201,8 @@ public class EntryLocationIndex implements Closeable {
log.info("Deleting indexes for ledgers: {}", ledgersToDelete);
long startTime = System.nanoTime();
- long deletedEntries = 0;
- long deletedEntriesInBatch = 0;
-
- Batch batch = locationsDb.newBatch();
- try {
+ try (Batch batch = locationsDb.newBatch()) {
for (long ledgerId : ledgersToDelete) {
if (log.isDebugEnabled()) {
log.debug("Deleting indexes from ledger {}", ledgerId);
@@ -219,66 +211,20 @@ public class EntryLocationIndex implements Closeable {
firstKeyWrapper.set(ledgerId, 0);
lastKeyWrapper.set(ledgerId, Long.MAX_VALUE);
- Entry<byte[], byte[]> firstKeyRes =
locationsDb.getCeil(firstKeyWrapper.array);
- if (firstKeyRes == null ||
ArrayUtil.getLong(firstKeyRes.getKey(), 0) != ledgerId) {
- // No entries found for ledger
- if (log.isDebugEnabled()) {
- log.debug("No entries found for ledger {}", ledgerId);
- }
- continue;
- }
-
- long firstEntryId = ArrayUtil.getLong(firstKeyRes.getKey(), 8);
- long lastEntryId;
- try {
- lastEntryId = getLastEntryInLedgerInternal(ledgerId);
- } catch (Bookie.NoEntryException nee) {
- if (log.isDebugEnabled()) {
- log.debug("No last entry id found for ledger {}",
ledgerId);
- }
- continue;
- }
- if (log.isDebugEnabled()) {
- log.debug("Deleting index for ledger {} entries ({} ->
{})",
- ledgerId, firstEntryId, lastEntryId);
- }
-
- // Iterate over all the keys and remove each of them
- for (long entryId = firstEntryId; entryId <= lastEntryId;
entryId++) {
- keyToDelete.set(ledgerId, entryId);
- if (log.isDebugEnabled()) {
- log.debug("Deleting index for ({}, {})",
keyToDelete.getFirst(), keyToDelete.getSecond());
- }
- batch.remove(keyToDelete.array);
- ++deletedEntriesInBatch;
- ++deletedEntries;
- }
+ batch.deleteRange(firstKeyWrapper.array, lastKeyWrapper.array);
+ }
- if (deletedEntriesInBatch > deleteEntriesBatchSize) {
- batch.flush();
- batch.clear();
- deletedEntriesInBatch = 0;
- }
+ batch.flush();
+ for (long ledgerId : ledgersToDelete) {
+ deletedLedgers.remove(ledgerId);
}
} finally {
- try {
- batch.flush();
- batch.clear();
- } finally {
- firstKeyWrapper.recycle();
- lastKeyWrapper.recycle();
- keyToDelete.recycle();
- batch.close();
- }
+ firstKeyWrapper.recycle();
+ lastKeyWrapper.recycle();
}
- log.info("Deleted indexes for {} entries from {} ledgers in {}
seconds", deletedEntries, ledgersToDelete.size(),
+ log.info("Deleted indexes from {} ledgers in {} seconds",
ledgersToDelete.size(),
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime) /
1000.0);
-
- // Removed from pending set
- for (long ledgerId : ledgersToDelete) {
- deletedLedgers.remove(ledgerId);
- }
}
private static final Logger log =
LoggerFactory.getLogger(EntryLocationIndex.class);
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
index 5c2ebb7146..05a4dec907 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
@@ -337,8 +337,6 @@ public class ServerConfiguration extends
AbstractConfiguration<ServerConfigurati
// Used for location index, lots of writes and much bigger dataset
protected static final String LEDGER_METADATA_ROCKSDB_CONF =
"ledgerMetadataRocksdbConf";
- protected static final String ROCKSDB_DELETE_ENTRIES_BATCH_SIZE =
"rocksDBDeleteEntriesBatchSize";
-
/**
* Construct a default configuration object.
*/
@@ -4048,24 +4046,4 @@ public class ServerConfiguration extends
AbstractConfiguration<ServerConfigurati
this.setProperty(LEDGER_METADATA_ROCKSDB_CONF,
ledgerMetadataRocksdbConf);
return this;
}
-
- /**
- * Get entry log location index delete entries batch size from RocksDB.
- *
- * @return Int rocksDB delete entries batch size configured in Service
configuration.
- */
- public int getRocksDBDeleteEntriesBatchSize() {
- return getInt(ROCKSDB_DELETE_ENTRIES_BATCH_SIZE, 100000);
- }
-
- /**
- * Set entry log location index delete entries batch size from RocksDB.
- *
- * @param rocksDBDeleteEntriesBatchSize
- * @return
- */
- public ServerConfiguration setRocksDBDeleteEntriesBatchSize(int
rocksDBDeleteEntriesBatchSize) {
- this.setProperty(ROCKSDB_DELETE_ENTRIES_BATCH_SIZE,
rocksDBDeleteEntriesBatchSize);
- return this;
- }
}
diff --git
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexTest.java
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexTest.java
index 9c1fbd320f..1aef54f4a4 100644
---
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexTest.java
+++
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexTest.java
@@ -79,6 +79,70 @@ public class EntryLocationIndexTest {
idx.close();
}
+ @Test
+ public void deleteBatchLedgersTest() throws Exception {
+ File tmpDir = File.createTempFile("bkTest", ".dir");
+ tmpDir.delete();
+ tmpDir.mkdir();
+ tmpDir.deleteOnExit();
+
+ EntryLocationIndex idx = new EntryLocationIndex(serverConfiguration,
KeyValueStorageRocksDB.factory,
+ tmpDir.getAbsolutePath(), NullStatsLogger.INSTANCE);
+
+ int numLedgers = 1000;
+ int numEntriesPerLedger = 100;
+
+ int location = 0;
+ KeyValueStorage.Batch batch = idx.newBatch();
+ for (int entryId = 0; entryId < numEntriesPerLedger; ++entryId) {
+ for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) {
+ idx.addLocation(batch, ledgerId, entryId, location);
+ location++;
+ }
+ }
+ batch.flush();
+ batch.close();
+
+
+ int expectedLocation = 0;
+ for (int entryId = 0; entryId < numEntriesPerLedger; ++entryId) {
+ for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) {
+ assertEquals(expectedLocation, idx.getLocation(ledgerId,
entryId));
+ expectedLocation++;
+ }
+ }
+
+ for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) {
+ if (ledgerId % 2 == 0) {
+ idx.delete(ledgerId);
+ }
+ }
+
+ expectedLocation = 0;
+ for (int entryId = 0; entryId < numEntriesPerLedger; ++entryId) {
+ for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) {
+ assertEquals(expectedLocation, idx.getLocation(ledgerId,
entryId));
+ expectedLocation++;
+ }
+ }
+
+ idx.removeOffsetFromDeletedLedgers();
+
+ expectedLocation = 0;
+ for (int entryId = 0; entryId < numEntriesPerLedger; ++entryId) {
+ for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) {
+ if (ledgerId % 2 == 0) {
+ assertEquals(0, idx.getLocation(ledgerId, entryId));
+ } else {
+ assertEquals(expectedLocation, idx.getLocation(ledgerId,
entryId));
+ }
+ expectedLocation++;
+ }
+ }
+
+ idx.close();
+ }
+
// this tests if a ledger is added after it has been deleted
@Test
public void addLedgerAfterDeleteTest() throws Exception {
diff --git a/conf/bk_server.conf b/conf/bk_server.conf
index 94cfe72736..c1696ca3be 100755
--- a/conf/bk_server.conf
+++ b/conf/bk_server.conf
@@ -756,10 +756,6 @@ gcEntryLogMetadataCacheEnabled=false
# Default is to use 10% / numberOfLedgers of the direct memory size
# dbStorage_rocksDB_blockCacheSize=
-# entry log location index delete entries batch size from RocksDB.
-# Default is 100000
-# rocksDBDeleteEntriesBatchSize=100000
-
# Other RocksDB specific tunables
# dbStorage_rocksDB_writeBufferSizeMB=64
# dbStorage_rocksDB_sstSizeInMB=64