This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 99a6dc00a0b [Improve](cache) Estimated column reader memory to control
segment cache (#34526)
99a6dc00a0b is described below
commit 99a6dc00a0bff219a764d5bf4d71ccac5b4557da
Author: Lightman <[email protected]>
AuthorDate: Wed May 8 21:34:36 2024 +0800
[Improve](cache) Estimated column reader memory to control segment cache
(#34526)
---
be/src/common/config.cpp | 4 ++++
be/src/common/config.h | 3 +++
be/src/olap/rowset/segment_v2/segment.cpp | 1 +
be/src/olap/rowset/segment_v2/segment.h | 6 ++++--
be/src/olap/segment_loader.cpp | 5 +++--
be/src/olap/segment_loader.h | 2 +-
be/src/runtime/exec_env_init.cpp | 13 +++++++++++--
7 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 5f2d12d740f..9c3b67d2656 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1053,6 +1053,10 @@ DEFINE_mInt32(schema_cache_sweep_time_sec, "100");
// max number of segment cache, default -1 for backward compatibility
fd_number*2/5
DEFINE_mInt32(segment_cache_capacity, "-1");
+DEFINE_mInt32(estimated_num_columns_per_segment, "30");
+DEFINE_mInt32(estimated_mem_per_column_reader, "1024");
+// The value is calculate by storage_page_cache_limit *
index_page_cache_percentage
+DEFINE_mInt32(segment_cache_memory_percentage, "2");
// enable feature binlog, default false
DEFINE_Bool(enable_feature_binlog, "false");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 73037ac10e9..f758236975e 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1099,6 +1099,9 @@ DECLARE_mInt32(schema_cache_sweep_time_sec);
// max number of segment cache
DECLARE_mInt32(segment_cache_capacity);
+DECLARE_mInt32(estimated_num_columns_per_segment);
+DECLARE_mInt32(estimated_mem_per_column_reader);
+DECLARE_Int32(segment_cache_memory_percentage);
// enable binlog
DECLARE_Bool(enable_feature_binlog);
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 9d7b573ca98..51539462293 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -408,6 +408,7 @@ Status Segment::_create_column_readers(const
SegmentFooterPB& footer) {
RETURN_IF_ERROR(ColumnReader::create(opts,
footer.columns(iter->second), footer.num_rows(),
_file_reader, &reader));
_column_readers.emplace(column.unique_id(), std::move(reader));
+ _meta_mem_usage += config::estimated_mem_per_column_reader;
}
// init by column path
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index 8a6a2d37c00..b6571209bac 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -185,6 +185,8 @@ public:
return safe;
}
+ const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; }
+
private:
DISALLOW_COPY_AND_ASSIGN(Segment);
Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr
tablet_schema);
@@ -211,11 +213,11 @@ private:
uint32_t _segment_id;
uint32_t _num_rows;
- // only for tracking memory use by segment meta data such as footer or
index page.
+ // 1. Tracking memory use by segment meta data such as footer or index
page.
+ // 2. Tracking memory use by segment column reader
// The memory consumed by querying is tracked in segment iterator.
// TODO: Segment::_meta_mem_usage Unknown value overflow, causes the value
of SegmentMeta mem tracker
// is similar to `-2912341218700198079`. So, temporarily put it in
experimental type tracker.
- // maybe have to use ColumnReader count as segment meta size.
int64_t _meta_mem_usage;
RowsetId _rowset_id;
diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp
index f6ca7b72a2a..cb076f28225 100644
--- a/be/src/olap/segment_loader.cpp
+++ b/be/src/olap/segment_loader.cpp
@@ -39,8 +39,9 @@ bool SegmentCache::lookup(const SegmentCache::CacheKey& key,
SegmentCacheHandle*
void SegmentCache::insert(const SegmentCache::CacheKey& key,
SegmentCache::CacheValue& value,
SegmentCacheHandle* handle) {
- auto* lru_handle = LRUCachePolicy::insert(
- key.encode(), &value, 1, value.segment->meta_mem_usage(),
CachePriority::NORMAL);
+ auto* lru_handle =
+ LRUCachePolicy::insert(key.encode(), &value,
value.segment->meta_mem_usage(),
+ value.segment->meta_mem_usage(),
CachePriority::NORMAL);
handle->push_segment(this, lru_handle);
}
diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h
index d952fd522d0..00fcbaf46e9 100644
--- a/be/src/olap/segment_loader.h
+++ b/be/src/olap/segment_loader.h
@@ -81,7 +81,7 @@ public:
};
SegmentCache(size_t capacity)
- : LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE, capacity,
LRUCacheType::NUMBER,
+ : LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE, capacity,
LRUCacheType::SIZE,
config::tablet_rowset_stale_sweep_time_sec) {}
// Lookup the given segment in the cache.
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index 818df8de22c..ba0473ee10c 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -486,9 +486,18 @@ Status ExecEnv::_init_mem_env() {
if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 2 /
5) {
segment_cache_capacity = fd_number * 2 / 5;
}
+
+ int64_t segment_cache_mem_limit =
+ MemInfo::mem_limit() / 100 *
config::segment_cache_memory_percentage;
+ // config::segment_cache_memory_percentage;
+ int64_t min_segment_cache_mem_limit =
+ min(segment_cache_mem_limit, segment_cache_capacity *
+
config::estimated_num_columns_per_segment *
+
config::estimated_mem_per_column_reader);
+ _segment_loader = new SegmentLoader(min_segment_cache_mem_limit);
LOG(INFO) << "segment_cache_capacity <= fd_number * 2 / 5, fd_number: " <<
fd_number
- << " segment_cache_capacity: " << segment_cache_capacity;
- _segment_loader = new SegmentLoader(segment_cache_capacity);
+ << " segment_cache_capacity: " << segment_cache_capacity
+ << " min_segment_cache_mem_limit " <<
min_segment_cache_mem_limit;
_schema_cache = new SchemaCache(config::schema_cache_capacity);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]