This is an automated email from the ASF dual-hosted git repository.
ayegorov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 50f5287 make rocksdb format version configurable
50f5287 is described below
commit 50f5287f3637c5fa01a4d146477087b217bdebd5
Author: Hang Chen <[email protected]>
AuthorDate: Tue Feb 15 03:00:06 2022 +0800
make rocksdb format version configurable
### Motivation
Fix #2823
RocksDB support several format versions which uses different data structure
to implement key-values indexes and have huge different performance.
https://rocksdb.org/blog/2019/03/08/format-version-4.html
https://github.com/facebook/rocksdb/blob/d52b520d5168de6be5f1494b2035b61ff0958c11/include/rocksdb/table.h#L368-L394
```C++
// We currently have five versions:
// 0 -- This version is currently written out by all RocksDB's versions by
// default. Can be read by really old RocksDB's. Doesn't support changing
// checksum (default is CRC32).
// 1 -- Can be read by RocksDB's versions since 3.0. Supports non-default
// checksum, like xxHash. It is written by RocksDB when
// BlockBasedTableOptions::checksum is something other than kCRC32c.
(version
// 0 is silently upconverted)
// 2 -- Can be read by RocksDB's versions since 3.10. Changes the way we
// encode compressed blocks with LZ4, BZip2 and Zlib compression. If you
// don't plan to run RocksDB before version 3.10, you should probably use
// this.
// 3 -- Can be read by RocksDB's versions since 5.15. Changes the way we
// encode the keys in index blocks. If you don't plan to run RocksDB
before
// version 5.15, you should probably use this.
// This option only affects newly written tables. When reading existing
// tables, the information about version is read from the footer.
// 4 -- Can be read by RocksDB's versions since 5.16. Changes the way we
// encode the values in index blocks. If you don't plan to run RocksDB
before
// version 5.16 and you are using index_block_restart_interval > 1, you
should
// probably use this as it would reduce the index size.
// This option only affects newly written tables. When reading existing
// tables, the information about version is read from the footer.
// 5 -- Can be read by RocksDB's versions since 6.6.0. Full and
partitioned
// filters use a generally faster and more accurate Bloom filter
// implementation, with a different schema.
uint32_t format_version = 5;
```
Different format version requires different rocksDB version and it couldn't
roll back once upgrade to new format version
In our current RocksDB storage code, we hard code the format_version to 2,
which is hard to to upgrade format_version to achieve new RocksDB's high
performance.
### Changes
1. Make the format_version configurable.
Reviewers: Matteo Merli <[email protected]>, Enrico Olivelli
<[email protected]>
This closes #2824 from
hangc0276/chenhang/make_rocksdb_format_version_configurable
---
.../apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java | 4 +++-
conf/bk_server.conf | 1 +
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java
index bda8272..e6eb197 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java
@@ -83,6 +83,7 @@ public class KeyValueStorageRocksDB implements
KeyValueStorage {
private static final String ROCKSDB_NUM_LEVELS =
"dbStorage_rocksDB_numLevels";
private static final String ROCKSDB_NUM_FILES_IN_LEVEL0 =
"dbStorage_rocksDB_numFilesInLevel0";
private static final String ROCKSDB_MAX_SIZE_IN_LEVEL1_MB =
"dbStorage_rocksDB_maxSizeInLevel1MB";
+ private static final String ROCKSDB_FORMAT_VERSION =
"dbStorage_rocksDB_format_version";
public KeyValueStorageRocksDB(String basePath, String subPath,
DbConfigType dbConfigType, ServerConfiguration conf)
throws IOException {
@@ -122,6 +123,7 @@ public class KeyValueStorageRocksDB implements
KeyValueStorage {
int blockSize = conf.getInt(ROCKSDB_BLOCK_SIZE, 64 * 1024);
int bloomFilterBitsPerKey =
conf.getInt(ROCKSDB_BLOOM_FILTERS_BITS_PER_KEY, 10);
boolean lz4CompressionEnabled =
conf.getBoolean(ROCKSDB_LZ4_COMPRESSION_ENABLED, true);
+ int formatVersion = conf.getInt(ROCKSDB_FORMAT_VERSION, 2);
if (lz4CompressionEnabled) {
options.setCompressionType(CompressionType.LZ4_COMPRESSION);
@@ -144,7 +146,7 @@ public class KeyValueStorageRocksDB implements
KeyValueStorage {
BlockBasedTableConfig tableOptions = new
BlockBasedTableConfig();
tableOptions.setBlockSize(blockSize);
tableOptions.setBlockCache(cache);
- tableOptions.setFormatVersion(2);
+ tableOptions.setFormatVersion(formatVersion);
tableOptions.setChecksumType(ChecksumType.kxxHash);
if (bloomFilterBitsPerKey > 0) {
tableOptions.setFilterPolicy(new
BloomFilter(bloomFilterBitsPerKey, false));
diff --git a/conf/bk_server.conf b/conf/bk_server.conf
index 801976e..f83a46e 100755
--- a/conf/bk_server.conf
+++ b/conf/bk_server.conf
@@ -740,6 +740,7 @@ gcEntryLogMetadataCacheEnabled=false
# dbStorage_rocksDB_numFilesInLevel0=4
# dbStorage_rocksDB_maxSizeInLevel1MB=256
# dbStorage_rocksDB_logPath=
+# dbStorage_rocksDB_format_version=2
############################################## Metadata Services
##############################################