This is an automated email from the ASF dual-hosted git repository.
yunhong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluss.git
The following commit(s) were added to refs/heads/main by this push:
new e4f694d75 Add RocksDB block cache configuration options for index and
filter blocks (#2394)
e4f694d75 is described below
commit e4f694d753228a15365448a55ee7d7b86fb40673
Author: Yang Wang <[email protected]>
AuthorDate: Sun Jan 18 13:28:33 2026 +0800
Add RocksDB block cache configuration options for index and filter blocks
(#2394)
This commit adds configuration options to control how RocksDB handles
index and filter blocks in the block cache:
- kv.rocksdb.block.cache-index-and-filter-blocks: Store index/filter
blocks in block cache to limit memory usage (default: false)
- kv.rocksdb.block.cache-index-and-filter-blocks-with-high-priority:
Give index/filter blocks high priority in cache (default: false)
- kv.rocksdb.block.pin-l0-filter-and-index-blocks-in-cache: Pin L0
index/filter blocks to avoid eviction (default: false)
- kv.rocksdb.block.pin-top-level-index-and-filter: Pin top-level
partitioned index/filter blocks (default: false)
These options help users control RocksDB memory usage by allowing
index and filter blocks to be managed within the block cache,
preventing unbounded memory growth.
---
.../org/apache/fluss/config/ConfigOptions.java | 39 ++++++++++++++++++++++
.../kv/rocksdb/RocksDBResourceContainer.java | 11 ++++++
.../kv/rocksdb/RocksDBResourceContainerTest.java | 39 ++++++++++++++++++++++
website/docs/maintenance/configuration.md | 4 +++
4 files changed, 93 insertions(+)
diff --git
a/fluss-common/src/main/java/org/apache/fluss/config/ConfigOptions.java
b/fluss-common/src/main/java/org/apache/fluss/config/ConfigOptions.java
index d3f7cd2ff..7e04c3a1c 100644
--- a/fluss-common/src/main/java/org/apache/fluss/config/ConfigOptions.java
+++ b/fluss-common/src/main/java/org/apache/fluss/config/ConfigOptions.java
@@ -1739,6 +1739,45 @@ public class ConfigOptions {
"If true, RocksDB will use block-based filter
instead of full filter, this only take effect when bloom filter is used. "
+ "The default value is `false`.");
+ public static final ConfigOption<Boolean> KV_CACHE_INDEX_AND_FILTER_BLOCKS
=
+ key("kv.rocksdb.block.cache-index-and-filter-blocks")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription(
+ "If true, index and filter blocks will be stored
in block cache, "
+ + "together with all other data blocks.
This helps to limit memory usage "
+ + "so that the total memory used by
RocksDB is bounded by block cache size. "
+ + "The default value is `false`.");
+
+ public static final ConfigOption<Boolean>
KV_CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY =
+
key("kv.rocksdb.block.cache-index-and-filter-blocks-with-high-priority")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription(
+ "If true and cache_index_and_filter_blocks is
enabled, "
+ + "index and filter blocks will be stored
with high priority in block cache, "
+ + "making them less likely to be evicted
than data blocks. "
+ + "The default value is `false`.");
+
+ public static final ConfigOption<Boolean>
KV_PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE =
+ key("kv.rocksdb.block.pin-l0-filter-and-index-blocks-in-cache")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription(
+ "If true and cache_index_and_filter_blocks is
enabled, "
+ + "L0 index and filter blocks will be
pinned in block cache and will not be evicted. "
+ + "This helps avoid performance
degradation due to cache misses on L0 index/filter blocks. "
+ + "The default value is `false`.");
+
+ public static final ConfigOption<Boolean>
KV_PIN_TOP_LEVEL_INDEX_AND_FILTER =
+ key("kv.rocksdb.block.pin-top-level-index-and-filter")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription(
+ "If true, the top-level index of partitioned
index/filter blocks will be pinned "
+ + "in block cache and will not be evicted.
"
+ + "The default value is `false`.");
+
// ------------------------------------------------------------------------
// ConfigOptions for Kv recovering
// ------------------------------------------------------------------------
diff --git
a/fluss-server/src/main/java/org/apache/fluss/server/kv/rocksdb/RocksDBResourceContainer.java
b/fluss-server/src/main/java/org/apache/fluss/server/kv/rocksdb/RocksDBResourceContainer.java
index a07ea1a91..6fb885b82 100644
---
a/fluss-server/src/main/java/org/apache/fluss/server/kv/rocksdb/RocksDBResourceContainer.java
+++
b/fluss-server/src/main/java/org/apache/fluss/server/kv/rocksdb/RocksDBResourceContainer.java
@@ -308,6 +308,17 @@ public class RocksDBResourceContainer implements
AutoCloseable {
handlesToClose.add(blockCache);
blockBasedTableConfig.setBlockCache(blockCache);
+ // Configure index and filter blocks caching
+ blockBasedTableConfig.setCacheIndexAndFilterBlocks(
+
internalGetOption(ConfigOptions.KV_CACHE_INDEX_AND_FILTER_BLOCKS));
+ blockBasedTableConfig.setCacheIndexAndFilterBlocksWithHighPriority(
+ internalGetOption(
+
ConfigOptions.KV_CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY));
+ blockBasedTableConfig.setPinL0FilterAndIndexBlocksInCache(
+
internalGetOption(ConfigOptions.KV_PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE));
+ blockBasedTableConfig.setPinTopLevelIndexAndFilter(
+
internalGetOption(ConfigOptions.KV_PIN_TOP_LEVEL_INDEX_AND_FILTER));
+
if (internalGetOption(ConfigOptions.KV_USE_BLOOM_FILTER)) {
final double bitsPerKey =
internalGetOption(ConfigOptions.KV_BLOOM_FILTER_BITS_PER_KEY);
final boolean blockBasedMode =
diff --git
a/fluss-server/src/test/java/org/apache/fluss/server/kv/rocksdb/RocksDBResourceContainerTest.java
b/fluss-server/src/test/java/org/apache/fluss/server/kv/rocksdb/RocksDBResourceContainerTest.java
index 676a17a6b..761cf0519 100644
---
a/fluss-server/src/test/java/org/apache/fluss/server/kv/rocksdb/RocksDBResourceContainerTest.java
+++
b/fluss-server/src/test/java/org/apache/fluss/server/kv/rocksdb/RocksDBResourceContainerTest.java
@@ -194,4 +194,43 @@ class RocksDBResourceContainerTest {
assertThat(tableConfig.filterPolicy() instanceof
BloomFilter).isTrue();
}
}
+
+ @Test
+ void testCacheIndexAndFilterBlocksConfig() throws Exception {
+ // Test with default values (all false, following RocksDB defaults)
+ Configuration defaultConfig = new Configuration();
+ try (RocksDBResourceContainer container =
+ new RocksDBResourceContainer(defaultConfig, null)) {
+ ColumnFamilyOptions columnOptions = container.getColumnOptions();
+ BlockBasedTableConfig tableConfig =
+ (BlockBasedTableConfig) columnOptions.tableFormatConfig();
+
+ // All default values should be false (RocksDB defaults)
+ assertThat(tableConfig.cacheIndexAndFilterBlocks()).isFalse();
+
assertThat(tableConfig.cacheIndexAndFilterBlocksWithHighPriority()).isFalse();
+
assertThat(tableConfig.pinL0FilterAndIndexBlocksInCache()).isFalse();
+ assertThat(tableConfig.pinTopLevelIndexAndFilter()).isFalse();
+ }
+
+ // Test with custom values (all true)
+ Configuration customConfig = new Configuration();
+
customConfig.setString(ConfigOptions.KV_CACHE_INDEX_AND_FILTER_BLOCKS.key(),
"true");
+ customConfig.setString(
+
ConfigOptions.KV_CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY.key(),
"true");
+ customConfig.setString(
+
ConfigOptions.KV_PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE.key(), "true");
+
customConfig.setString(ConfigOptions.KV_PIN_TOP_LEVEL_INDEX_AND_FILTER.key(),
"true");
+
+ try (RocksDBResourceContainer container =
+ new RocksDBResourceContainer(customConfig, null)) {
+ ColumnFamilyOptions columnOptions = container.getColumnOptions();
+ BlockBasedTableConfig tableConfig =
+ (BlockBasedTableConfig) columnOptions.tableFormatConfig();
+
+ assertThat(tableConfig.cacheIndexAndFilterBlocks()).isTrue();
+
assertThat(tableConfig.cacheIndexAndFilterBlocksWithHighPriority()).isTrue();
+
assertThat(tableConfig.pinL0FilterAndIndexBlocksInCache()).isTrue();
+ assertThat(tableConfig.pinTopLevelIndexAndFilter()).isTrue();
+ }
+ }
}
diff --git a/website/docs/maintenance/configuration.md
b/website/docs/maintenance/configuration.md
index 97da3457d..44d6e6278 100644
--- a/website/docs/maintenance/configuration.md
+++ b/website/docs/maintenance/configuration.md
@@ -154,6 +154,10 @@ during the Fluss cluster working.
| kv.rocksdb.writebuffer.number-to-merge | Integer | 1
| The minimum number of write buffers that will be merged
together before writing to storage. The default value is `1`.
[...]
| kv.rocksdb.block.blocksize | MemorySize | 4kb
| The approximate size (in bytes) of user data packed per
block. The default blocksize is `4KB`.
[...]
| kv.rocksdb.block.cache-size | MemorySize | 8mb
| The amount of the cache for data blocks in RocksDB. The
default block-cache size is `8MB`.
[...]
+| kv.rocksdb.block.cache-index-and-filter-blocks | Boolean | false
| If true, index and filter blocks will be stored in block
cache, together with all other data blocks. This helps to limit memory usage so
that the total memory used by RocksDB is bounded by block cache size. The
default value is `false`.
[...]
+| kv.rocksdb.block.cache-index-and-filter-blocks-with-high-priority | Boolean
| false | If true and cache_index_and_filter_blocks is
enabled, index and filter blocks will be stored with high priority in block
cache, making them less likely to be evicted than data blocks. The default
value is `false`.
[...]
+| kv.rocksdb.block.pin-l0-filter-and-index-blocks-in-cache | Boolean | false
| If true and cache_index_and_filter_blocks is enabled,
L0 index and filter blocks will be pinned in block cache and will not be
evicted. This helps avoid performance degradation due to cache misses on L0
index/filter blocks. The default value is `false`.
[...]
+| kv.rocksdb.block.pin-top-level-index-and-filter | Boolean | false
| If true, the top-level index of partitioned index/filter
blocks will be pinned in block cache and will not be evicted. The default value
is `false`.
[...]
| kv.rocksdb.use-bloom-filter | Boolean | true
| If true, every newly created SST file will contain a Bloom
filter. It is enabled by default.
[...]
| kv.rocksdb.bloom-filter.bits-per-key | Double | 10.0
| Bits per key that bloom filter will use, this only take
effect when bloom filter is used. The default value is 10.0.
[...]
| kv.rocksdb.bloom-filter.block-based-mode | Boolean | false
| If true, RocksDB will use block-based filter instead of
full filter, this only take effect when bloom filter is used. The default value
is `false`.
[...]