This is an automated email from the ASF dual-hosted git repository.

ibessonov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git


The following commit(s) were added to refs/heads/main by this push:
     new c7c620b3c3 IGNITE-19700 Added several optimizations for meta-storage 
reads. (#2218)
c7c620b3c3 is described below

commit c7c620b3c3c80698de2ce0a6855b6495ca351b4b
Author: Ivan Bessonov <[email protected]>
AuthorDate: Tue Jun 20 13:51:05 2023 +0300

    IGNITE-19700 Added several optimizations for meta-storage reads. (#2218)
---
 .../server/persistence/RocksDbKeyValueStorage.java | 36 ++++++++++++++++------
 .../server/persistence/RocksStorageUtils.java      | 18 ++++++-----
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git 
a/modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/server/persistence/RocksDbKeyValueStorage.java
 
b/modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/server/persistence/RocksDbKeyValueStorage.java
index a03de949bc..15ba8667af 100644
--- 
a/modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/server/persistence/RocksDbKeyValueStorage.java
+++ 
b/modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/server/persistence/RocksDbKeyValueStorage.java
@@ -39,6 +39,7 @@ import static 
org.apache.ignite.lang.ErrorGroups.MetaStorage.COMPACTION_ERR;
 import static org.apache.ignite.lang.ErrorGroups.MetaStorage.OP_EXECUTION_ERR;
 import static 
org.apache.ignite.lang.ErrorGroups.MetaStorage.RESTORING_STORAGE_ERR;
 import static 
org.apache.ignite.lang.ErrorGroups.MetaStorage.STARTING_STORAGE_ERR;
+import static org.rocksdb.util.SizeUnit.MB;
 
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Path;
@@ -86,10 +87,13 @@ import org.apache.ignite.internal.util.Cursor;
 import org.apache.ignite.internal.util.IgniteUtils;
 import org.jetbrains.annotations.Nullable;
 import org.jetbrains.annotations.TestOnly;
+import org.rocksdb.BlockBasedTableConfig;
+import org.rocksdb.BloomFilter;
 import org.rocksdb.ColumnFamilyDescriptor;
 import org.rocksdb.ColumnFamilyHandle;
 import org.rocksdb.ColumnFamilyOptions;
 import org.rocksdb.DBOptions;
+import org.rocksdb.LRUCache;
 import org.rocksdb.Options;
 import org.rocksdb.ReadOptions;
 import org.rocksdb.RocksDB;
@@ -239,20 +243,31 @@ public class RocksDbKeyValueStorage implements 
KeyValueStorage {
     }
 
     private static List<ColumnFamilyDescriptor> cfDescriptors() {
-        Options dataOptions = new Options().setCreateIfMissing(true)
+        Options baseOptions = new Options()
+                .setCreateIfMissing(true)
+                // Lowering the desired number of levels will, on average, 
lead to less lookups in files, making reads faster.
+                .setNumLevels(4)
+                // Protect ourselves from slower flushes during the peak write 
load.
+                .setMaxWriteBufferNumber(4)
+                .setTableFormatConfig(new BlockBasedTableConfig()
+                        // Speed-up key lookup in levels by adding a bloom 
filter and always caching it for level 0.
+                        // This improves the access time to keys from lower 
levels. 12 is chosen to fit into a 4kb memory chunk.
+                        // This proved to be big enough to positively affect 
the performance.
+                        .setPinL0FilterAndIndexBlocksInCache(true)
+                        .setFilterPolicy(new BloomFilter(12))
+                        // Often helps to avoid reading data from the storage 
device, making reads faster.
+                        .setBlockCache(new LRUCache(64 * MB))
+                );
+
+        ColumnFamilyOptions dataFamilyOptions = new 
ColumnFamilyOptions(baseOptions)
                 // The prefix is the revision of an entry, so prefix length is 
the size of a long
                 .useFixedLengthPrefixExtractor(Long.BYTES);
 
-        ColumnFamilyOptions dataFamilyOptions = new 
ColumnFamilyOptions(dataOptions);
+        ColumnFamilyOptions indexFamilyOptions = new 
ColumnFamilyOptions(baseOptions);
 
-        Options indexOptions = new Options().setCreateIfMissing(true);
-        ColumnFamilyOptions indexFamilyOptions = new 
ColumnFamilyOptions(indexOptions);
+        ColumnFamilyOptions tsToRevFamilyOptions = new 
ColumnFamilyOptions(baseOptions);
 
-        Options tsToRevOptions = new Options().setCreateIfMissing(true);
-        ColumnFamilyOptions tsToRevFamilyOptions = new 
ColumnFamilyOptions(tsToRevOptions);
-
-        Options revToTsOptions = new Options().setCreateIfMissing(true);
-        ColumnFamilyOptions revToTsFamilyOptions = new 
ColumnFamilyOptions(revToTsOptions);
+        ColumnFamilyOptions revToTsFamilyOptions = new 
ColumnFamilyOptions(baseOptions);
 
         return List.of(
                 new ColumnFamilyDescriptor(DATA.nameAsBytes(), 
dataFamilyOptions),
@@ -427,7 +442,8 @@ public class RocksDbKeyValueStorage implements 
KeyValueStorage {
      * @throws RocksDBException If failed.
      */
     private void fillAndWriteBatch(WriteBatch batch, long newRev, long 
newCntr, @Nullable HybridTimestamp ts) throws RocksDBException {
-        try (WriteOptions opts = new WriteOptions()) {
+        // Meta-storage recovery is based on the snapshot & external log. WAL 
is never used for recovery, and can be safely disabled.
+        try (WriteOptions opts = new WriteOptions().setDisableWAL(true)) {
             byte[] revisionBytes = longToBytes(newRev);
 
             data.put(batch, UPDATE_COUNTER_KEY, longToBytes(newCntr));
diff --git 
a/modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/server/persistence/RocksStorageUtils.java
 
b/modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/server/persistence/RocksStorageUtils.java
index 1c4ed24fbe..1ce90f6d38 100644
--- 
a/modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/server/persistence/RocksStorageUtils.java
+++ 
b/modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/server/persistence/RocksStorageUtils.java
@@ -23,7 +23,6 @@ import java.lang.invoke.MethodHandles;
 import java.lang.invoke.VarHandle;
 import java.nio.ByteOrder;
 import java.util.Arrays;
-import java.util.stream.IntStream;
 import org.apache.ignite.internal.metastorage.server.Value;
 import org.jetbrains.annotations.Nullable;
 
@@ -185,9 +184,15 @@ class RocksStorageUtils {
         // Value must be divisible by a size of a long, because it's a list of 
longs
         assert (bytes.length % Long.BYTES) == 0;
 
-        return IntStream.range(0, bytes.length / Long.BYTES)
-                .mapToLong(i -> (long) LONG_ARRAY_HANDLE.get(bytes, i * 
Long.BYTES))
-                .toArray();
+        int size = bytes.length / Long.BYTES;
+
+        long[] res = new long[size];
+
+        for (int i = 0; i < size; i++) {
+            res[i] = (long) LONG_ARRAY_HANDLE.get(bytes, i * Long.BYTES);
+        }
+
+        return res;
     }
 
     /**
@@ -202,11 +207,8 @@ class RocksStorageUtils {
             return longToBytes(value);
         }
 
-        // Allocate a one long bigger array.
-        var result = new byte[bytes.length + Long.BYTES];
-
         // Copy the current value
-        System.arraycopy(bytes, 0, result, 0, bytes.length);
+        var result = Arrays.copyOf(bytes, bytes.length + Long.BYTES);
 
         LONG_ARRAY_HANDLE.set(result, bytes.length, value);
 

Reply via email to