This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 99a6dc00a0b [Improve](cache) Estimated column reader memory to control 
segment cache (#34526)
99a6dc00a0b is described below

commit 99a6dc00a0bff219a764d5bf4d71ccac5b4557da
Author: Lightman <[email protected]>
AuthorDate: Wed May 8 21:34:36 2024 +0800

    [Improve](cache) Estimated column reader memory to control segment cache 
(#34526)
---
 be/src/common/config.cpp                  |  4 ++++
 be/src/common/config.h                    |  3 +++
 be/src/olap/rowset/segment_v2/segment.cpp |  1 +
 be/src/olap/rowset/segment_v2/segment.h   |  6 ++++--
 be/src/olap/segment_loader.cpp            |  5 +++--
 be/src/olap/segment_loader.h              |  2 +-
 be/src/runtime/exec_env_init.cpp          | 13 +++++++++++--
 7 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 5f2d12d740f..9c3b67d2656 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1053,6 +1053,10 @@ DEFINE_mInt32(schema_cache_sweep_time_sec, "100");
 
 // max number of segment cache, default -1 for backward compatibility 
fd_number*2/5
 DEFINE_mInt32(segment_cache_capacity, "-1");
+DEFINE_mInt32(estimated_num_columns_per_segment, "30");
+DEFINE_mInt32(estimated_mem_per_column_reader, "1024");
+// The value is calculate by storage_page_cache_limit * 
index_page_cache_percentage
+DEFINE_mInt32(segment_cache_memory_percentage, "2");
 
 // enable feature binlog, default false
 DEFINE_Bool(enable_feature_binlog, "false");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 73037ac10e9..f758236975e 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1099,6 +1099,9 @@ DECLARE_mInt32(schema_cache_sweep_time_sec);
 
 // max number of segment cache
 DECLARE_mInt32(segment_cache_capacity);
+DECLARE_mInt32(estimated_num_columns_per_segment);
+DECLARE_mInt32(estimated_mem_per_column_reader);
+DECLARE_Int32(segment_cache_memory_percentage);
 
 // enable binlog
 DECLARE_Bool(enable_feature_binlog);
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 9d7b573ca98..51539462293 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -408,6 +408,7 @@ Status Segment::_create_column_readers(const 
SegmentFooterPB& footer) {
         RETURN_IF_ERROR(ColumnReader::create(opts, 
footer.columns(iter->second), footer.num_rows(),
                                              _file_reader, &reader));
         _column_readers.emplace(column.unique_id(), std::move(reader));
+        _meta_mem_usage += config::estimated_mem_per_column_reader;
     }
 
     // init by column path
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index 8a6a2d37c00..b6571209bac 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -185,6 +185,8 @@ public:
         return safe;
     }
 
+    const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; }
+
 private:
     DISALLOW_COPY_AND_ASSIGN(Segment);
     Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr 
tablet_schema);
@@ -211,11 +213,11 @@ private:
     uint32_t _segment_id;
     uint32_t _num_rows;
 
-    // only for tracking memory use by segment meta data such as footer or 
index page.
+    // 1. Tracking memory use by segment meta data such as footer or index 
page.
+    // 2. Tracking memory use by segment column reader
     // The memory consumed by querying is tracked in segment iterator.
     // TODO: Segment::_meta_mem_usage Unknown value overflow, causes the value 
of SegmentMeta mem tracker
     // is similar to `-2912341218700198079`. So, temporarily put it in 
experimental type tracker.
-    // maybe have to use ColumnReader count as segment meta size.
     int64_t _meta_mem_usage;
 
     RowsetId _rowset_id;
diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp
index f6ca7b72a2a..cb076f28225 100644
--- a/be/src/olap/segment_loader.cpp
+++ b/be/src/olap/segment_loader.cpp
@@ -39,8 +39,9 @@ bool SegmentCache::lookup(const SegmentCache::CacheKey& key, 
SegmentCacheHandle*
 
 void SegmentCache::insert(const SegmentCache::CacheKey& key, 
SegmentCache::CacheValue& value,
                           SegmentCacheHandle* handle) {
-    auto* lru_handle = LRUCachePolicy::insert(
-            key.encode(), &value, 1, value.segment->meta_mem_usage(), 
CachePriority::NORMAL);
+    auto* lru_handle =
+            LRUCachePolicy::insert(key.encode(), &value, 
value.segment->meta_mem_usage(),
+                                   value.segment->meta_mem_usage(), 
CachePriority::NORMAL);
     handle->push_segment(this, lru_handle);
 }
 
diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h
index d952fd522d0..00fcbaf46e9 100644
--- a/be/src/olap/segment_loader.h
+++ b/be/src/olap/segment_loader.h
@@ -81,7 +81,7 @@ public:
     };
 
     SegmentCache(size_t capacity)
-            : LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE, capacity, 
LRUCacheType::NUMBER,
+            : LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE, capacity, 
LRUCacheType::SIZE,
                              config::tablet_rowset_stale_sweep_time_sec) {}
 
     // Lookup the given segment in the cache.
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index 818df8de22c..ba0473ee10c 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -486,9 +486,18 @@ Status ExecEnv::_init_mem_env() {
     if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 2 / 
5) {
         segment_cache_capacity = fd_number * 2 / 5;
     }
+
+    int64_t segment_cache_mem_limit =
+            MemInfo::mem_limit() / 100 * 
config::segment_cache_memory_percentage;
+    // config::segment_cache_memory_percentage;
+    int64_t min_segment_cache_mem_limit =
+            min(segment_cache_mem_limit, segment_cache_capacity *
+                                                 
config::estimated_num_columns_per_segment *
+                                                 
config::estimated_mem_per_column_reader);
+    _segment_loader = new SegmentLoader(min_segment_cache_mem_limit);
     LOG(INFO) << "segment_cache_capacity <= fd_number * 2 / 5, fd_number: " << 
fd_number
-              << " segment_cache_capacity: " << segment_cache_capacity;
-    _segment_loader = new SegmentLoader(segment_cache_capacity);
+              << " segment_cache_capacity: " << segment_cache_capacity
+              << " min_segment_cache_mem_limit " << 
min_segment_cache_mem_limit;
 
     _schema_cache = new SchemaCache(config::schema_cache_capacity);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to