This is an automated email from the ASF dual-hosted git repository.

ayegorov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new 50f5287  make rocksdb format version configurable
50f5287 is described below

commit 50f5287f3637c5fa01a4d146477087b217bdebd5
Author: Hang Chen <[email protected]>
AuthorDate: Tue Feb 15 03:00:06 2022 +0800

    make rocksdb format version configurable
    
    ### Motivation
    Fix #2823
    RocksDB support several format versions which uses different data structure 
to implement key-values indexes and have huge different performance. 
https://rocksdb.org/blog/2019/03/08/format-version-4.html
    
    
https://github.com/facebook/rocksdb/blob/d52b520d5168de6be5f1494b2035b61ff0958c11/include/rocksdb/table.h#L368-L394
    
    ```C++
      // We currently have five versions:
      // 0 -- This version is currently written out by all RocksDB's versions by
      // default.  Can be read by really old RocksDB's. Doesn't support changing
      // checksum (default is CRC32).
      // 1 -- Can be read by RocksDB's versions since 3.0. Supports non-default
      // checksum, like xxHash. It is written by RocksDB when
      // BlockBasedTableOptions::checksum is something other than kCRC32c. 
(version
      // 0 is silently upconverted)
      // 2 -- Can be read by RocksDB's versions since 3.10. Changes the way we
      // encode compressed blocks with LZ4, BZip2 and Zlib compression. If you
      // don't plan to run RocksDB before version 3.10, you should probably use
      // this.
      // 3 -- Can be read by RocksDB's versions since 5.15. Changes the way we
      // encode the keys in index blocks. If you don't plan to run RocksDB 
before
      // version 5.15, you should probably use this.
      // This option only affects newly written tables. When reading existing
      // tables, the information about version is read from the footer.
      // 4 -- Can be read by RocksDB's versions since 5.16. Changes the way we
      // encode the values in index blocks. If you don't plan to run RocksDB 
before
      // version 5.16 and you are using index_block_restart_interval > 1, you 
should
      // probably use this as it would reduce the index size.
      // This option only affects newly written tables. When reading existing
      // tables, the information about version is read from the footer.
      // 5 -- Can be read by RocksDB's versions since 6.6.0. Full and 
partitioned
      // filters use a generally faster and more accurate Bloom filter
      // implementation, with a different schema.
      uint32_t format_version = 5;
    ```
    Different format version requires different rocksDB version and it couldn't 
roll back once upgrade to new format version
    
    In our current RocksDB storage code, we hard code the format_version to 2, 
which is hard to to upgrade format_version to achieve new RocksDB's high 
performance.
    
    ### Changes
    
    1. Make the format_version configurable.
    
    
    Reviewers: Matteo Merli <[email protected]>, Enrico Olivelli 
<[email protected]>
    
    This closes #2824 from 
hangc0276/chenhang/make_rocksdb_format_version_configurable
---
 .../apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java  | 4 +++-
 conf/bk_server.conf                                                   | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java
index bda8272..e6eb197 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java
@@ -83,6 +83,7 @@ public class KeyValueStorageRocksDB implements 
KeyValueStorage {
     private static final String ROCKSDB_NUM_LEVELS = 
"dbStorage_rocksDB_numLevels";
     private static final String ROCKSDB_NUM_FILES_IN_LEVEL0 = 
"dbStorage_rocksDB_numFilesInLevel0";
     private static final String ROCKSDB_MAX_SIZE_IN_LEVEL1_MB = 
"dbStorage_rocksDB_maxSizeInLevel1MB";
+    private static final String ROCKSDB_FORMAT_VERSION = 
"dbStorage_rocksDB_format_version";
 
     public KeyValueStorageRocksDB(String basePath, String subPath, 
DbConfigType dbConfigType, ServerConfiguration conf)
             throws IOException {
@@ -122,6 +123,7 @@ public class KeyValueStorageRocksDB implements 
KeyValueStorage {
                 int blockSize = conf.getInt(ROCKSDB_BLOCK_SIZE, 64 * 1024);
                 int bloomFilterBitsPerKey = 
conf.getInt(ROCKSDB_BLOOM_FILTERS_BITS_PER_KEY, 10);
                 boolean lz4CompressionEnabled = 
conf.getBoolean(ROCKSDB_LZ4_COMPRESSION_ENABLED, true);
+                int formatVersion = conf.getInt(ROCKSDB_FORMAT_VERSION, 2);
 
                 if (lz4CompressionEnabled) {
                     
options.setCompressionType(CompressionType.LZ4_COMPRESSION);
@@ -144,7 +146,7 @@ public class KeyValueStorageRocksDB implements 
KeyValueStorage {
                 BlockBasedTableConfig tableOptions = new 
BlockBasedTableConfig();
                 tableOptions.setBlockSize(blockSize);
                 tableOptions.setBlockCache(cache);
-                tableOptions.setFormatVersion(2);
+                tableOptions.setFormatVersion(formatVersion);
                 tableOptions.setChecksumType(ChecksumType.kxxHash);
                 if (bloomFilterBitsPerKey > 0) {
                     tableOptions.setFilterPolicy(new 
BloomFilter(bloomFilterBitsPerKey, false));
diff --git a/conf/bk_server.conf b/conf/bk_server.conf
index 801976e..f83a46e 100755
--- a/conf/bk_server.conf
+++ b/conf/bk_server.conf
@@ -740,6 +740,7 @@ gcEntryLogMetadataCacheEnabled=false
 # dbStorage_rocksDB_numFilesInLevel0=4
 # dbStorage_rocksDB_maxSizeInLevel1MB=256
 # dbStorage_rocksDB_logPath=
+# dbStorage_rocksDB_format_version=2
 
 
 ############################################## Metadata Services 
##############################################

Reply via email to