This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 07278e9dcb6 [improvement](segmentcache) limit segment cache by memory
or segment … (#37035)
07278e9dcb6 is described below
commit 07278e9dcb6df6351e90be7a22599aa1132d5ff1
Author: Yongqiang YANG <[email protected]>
AuthorDate: Sun Jun 30 20:34:13 2024 +0800
[improvement](segmentcache) limit segment cache by memory or segment …
(#37035)
…num (#37026)
pick ##37026
---
be/src/common/config.cpp | 2 +-
be/src/olap/lru_cache.cpp | 5 +++++
be/src/olap/lru_cache.h | 4 +++-
be/src/olap/segment_loader.h | 11 +++++++----
be/src/runtime/exec_env_init.cpp | 8 ++++----
be/test/testutil/run_all_tests.cpp | 2 +-
6 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 493ad699aac..7a8c63db748 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1052,7 +1052,7 @@ DEFINE_mInt32(schema_cache_sweep_time_sec, "100");
// max number of segment cache, default -1 for backward compatibility
fd_number*2/5
DEFINE_mInt32(segment_cache_capacity, "-1");
-DEFINE_mInt32(estimated_num_columns_per_segment, "30");
+DEFINE_mInt32(estimated_num_columns_per_segment, "200");
DEFINE_mInt32(estimated_mem_per_column_reader, "1024");
// The value is calculate by storage_page_cache_limit *
index_page_cache_percentage
DEFINE_mInt32(segment_cache_memory_percentage, "2");
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index 031082f6da8..741c2423915 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -22,6 +22,7 @@ namespace doris {
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_capacity, MetricUnit::BYTES);
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage, MetricUnit::BYTES);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_element_count, MetricUnit::NOUNIT);
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage_ratio, MetricUnit::NOUNIT);
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_lookup_count,
MetricUnit::OPERATIONS);
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_hit_count, MetricUnit::OPERATIONS);
@@ -542,6 +543,7 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name,
size_t total_capacity,
_entity->register_hook(name,
std::bind(&ShardedLRUCache::update_cache_metrics, this));
INT_GAUGE_METRIC_REGISTER(_entity, cache_capacity);
INT_GAUGE_METRIC_REGISTER(_entity, cache_usage);
+ INT_GAUGE_METRIC_REGISTER(_entity, cache_element_count);
INT_DOUBLE_METRIC_REGISTER(_entity, cache_usage_ratio);
INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_lookup_count);
INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_hit_count);
@@ -640,15 +642,18 @@ void ShardedLRUCache::update_cache_metrics() const {
size_t total_usage = 0;
size_t total_lookup_count = 0;
size_t total_hit_count = 0;
+ size_t total_element_count = 0;
for (int i = 0; i < _num_shards; i++) {
total_capacity += _shards[i]->get_capacity();
total_usage += _shards[i]->get_usage();
total_lookup_count += _shards[i]->get_lookup_count();
total_hit_count += _shards[i]->get_hit_count();
+ total_element_count += _shards[i]->get_element_count();
}
cache_capacity->set_value(total_capacity);
cache_usage->set_value(total_usage);
+ cache_element_count->set_value(total_element_count);
cache_lookup_count->set_value(total_lookup_count);
cache_hit_count->set_value(total_hit_count);
cache_usage_ratio->set_value(total_capacity == 0 ? 0 :
((double)total_usage / total_capacity));
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index 50676921044..059020deab5 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -60,7 +60,7 @@ enum LRUCacheType {
};
static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE;
-static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 16;
+static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32;
static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0;
class CacheKey {
@@ -349,6 +349,7 @@ public:
uint64_t get_hit_count() const { return _hit_count; }
size_t get_usage() const { return _usage; }
size_t get_capacity() const { return _capacity; }
+ size_t get_element_count() const { return _table.element_count(); }
private:
void _lru_remove(LRUHandle* e);
@@ -433,6 +434,7 @@ private:
std::shared_ptr<MetricEntity> _entity;
IntGauge* cache_capacity = nullptr;
IntGauge* cache_usage = nullptr;
+ IntGauge* cache_element_count = nullptr;
DoubleGauge* cache_usage_ratio = nullptr;
IntAtomicCounter* cache_lookup_count = nullptr;
IntAtomicCounter* cache_hit_count = nullptr;
diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h
index cd0f7799abe..660fd3db189 100644
--- a/be/src/olap/segment_loader.h
+++ b/be/src/olap/segment_loader.h
@@ -80,9 +80,10 @@ public:
segment_v2::SegmentSharedPtr segment;
};
- SegmentCache(size_t capacity)
- : LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE, capacity,
LRUCacheType::SIZE,
- config::tablet_rowset_stale_sweep_time_sec) {}
+ SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit)
+ : LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE,
memory_bytes_limit,
+ LRUCacheType::SIZE,
config::tablet_rowset_stale_sweep_time_sec,
+ DEFAULT_LRU_CACHE_NUM_SHARDS * 2,
segment_num_limit) {}
// Lookup the given segment in the cache.
// If the segment is found, the cache entry will be written into handle.
@@ -109,7 +110,9 @@ public:
// After the estimation of segment memory usage is provided later, it is
recommended
// to use Memory as the capacity limit of the cache.
- SegmentLoader(size_t capacity) { _segment_cache =
std::make_unique<SegmentCache>(capacity); }
+ SegmentLoader(size_t memory_limit_bytes, size_t segment_num_count) {
+ _segment_cache = std::make_unique<SegmentCache>(memory_limit_bytes,
segment_num_count);
+ }
// Load segments of "rowset", return the "cache_handle" which contains
segments.
// If use_cache is true, it will be loaded from _cache.
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index e303f4720d2..d9e21e1603b 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -450,8 +450,8 @@ Status ExecEnv::_init_mem_env() {
// SegmentLoader caches segments in rowset granularity. So the size of
// opened files will greater than segment_cache_capacity.
int64_t segment_cache_capacity = config::segment_cache_capacity;
- if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 2 /
5) {
- segment_cache_capacity = fd_number * 2 / 5;
+ if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 1 /
5) {
+ segment_cache_capacity = fd_number * 1 / 5;
}
int64_t segment_cache_mem_limit =
@@ -461,8 +461,8 @@ Status ExecEnv::_init_mem_env() {
min(segment_cache_mem_limit, segment_cache_capacity *
config::estimated_num_columns_per_segment *
config::estimated_mem_per_column_reader);
- _segment_loader = new SegmentLoader(min_segment_cache_mem_limit);
- LOG(INFO) << "segment_cache_capacity <= fd_number * 2 / 5, fd_number: " <<
fd_number
+ _segment_loader = new SegmentLoader(min_segment_cache_mem_limit,
segment_cache_capacity);
+ LOG(INFO) << "segment_cache_capacity <= fd_number * 1 / 5, fd_number: " <<
fd_number
<< " segment_cache_capacity: " << segment_cache_capacity
<< " min_segment_cache_mem_limit " <<
min_segment_cache_mem_limit;
diff --git a/be/test/testutil/run_all_tests.cpp
b/be/test/testutil/run_all_tests.cpp
index f67db887e80..de088f8d17b 100644
--- a/be/test/testutil/run_all_tests.cpp
+++ b/be/test/testutil/run_all_tests.cpp
@@ -50,7 +50,7 @@ int main(int argc, char** argv) {
doris::ExecEnv::GetInstance()->set_dummy_lru_cache(std::make_shared<doris::DummyLRUCache>());
doris::ExecEnv::GetInstance()->set_storage_page_cache(
doris::StoragePageCache::create_global_cache(1 << 30, 10, 0));
- doris::ExecEnv::GetInstance()->set_segment_loader(new
doris::SegmentLoader(1000));
+ doris::ExecEnv::GetInstance()->set_segment_loader(new
doris::SegmentLoader(1000, 1000));
std::string conf = std::string(getenv("DORIS_HOME")) + "/conf/be.conf";
auto st = doris::config::init(conf.c_str(), false);
doris::ExecEnv::GetInstance()->set_tablet_schema_cache(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]