This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 093fe354c8db1ae89955f33e5a33a0fd208a64e8 Author: Lightman <[email protected]> AuthorDate: Wed May 8 21:34:36 2024 +0800 [Improve](cache) Estimated column reader memory to control segment cache (#34526) --- be/src/common/config.cpp | 4 ++++ be/src/common/config.h | 3 +++ be/src/olap/rowset/segment_v2/segment.cpp | 1 + be/src/olap/rowset/segment_v2/segment.h | 6 ++++-- be/src/olap/segment_loader.cpp | 5 +++-- be/src/olap/segment_loader.h | 2 +- be/src/runtime/exec_env_init.cpp | 13 +++++++++++-- 7 files changed, 27 insertions(+), 7 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 2e0818c6f51..02412e9111c 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1039,6 +1039,10 @@ DEFINE_mInt32(schema_cache_sweep_time_sec, "100"); // max number of segment cache, default -1 for backward compatibility fd_number*2/5 DEFINE_mInt32(segment_cache_capacity, "-1"); +DEFINE_mInt32(estimated_num_columns_per_segment, "30"); +DEFINE_mInt32(estimated_mem_per_column_reader, "1024"); +// The value is calculate by storage_page_cache_limit * index_page_cache_percentage +DEFINE_mInt32(segment_cache_memory_percentage, "2"); // enable feature binlog, default false DEFINE_Bool(enable_feature_binlog, "false"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 02d55ed81e2..403454abbb1 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1078,6 +1078,9 @@ DECLARE_mInt32(schema_cache_sweep_time_sec); // max number of segment cache DECLARE_mInt32(segment_cache_capacity); +DECLARE_mInt32(estimated_num_columns_per_segment); +DECLARE_mInt32(estimated_mem_per_column_reader); +DECLARE_Int32(segment_cache_memory_percentage); // enable binlog DECLARE_Bool(enable_feature_binlog); diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index ac222ed088c..bed5b53145b 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -410,6 +410,7 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { RETURN_IF_ERROR(ColumnReader::create(opts, footer.columns(iter->second), footer.num_rows(), _file_reader, &reader)); _column_readers.emplace(column.unique_id(), std::move(reader)); + _meta_mem_usage += config::estimated_mem_per_column_reader; } // init by column path diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 819a28648ad..e97a0389e21 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -185,6 +185,8 @@ public: return safe; } + const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; } + private: DISALLOW_COPY_AND_ASSIGN(Segment); Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema); @@ -210,11 +212,11 @@ private: uint32_t _segment_id; uint32_t _num_rows; - // only for tracking memory use by segment meta data such as footer or index page. + // 1. Tracking memory use by segment meta data such as footer or index page. + // 2. Tracking memory use by segment column reader // The memory consumed by querying is tracked in segment iterator. // TODO: Segment::_meta_mem_usage Unknown value overflow, causes the value of SegmentMeta mem tracker // is similar to `-2912341218700198079`. So, temporarily put it in experimental type tracker. - // maybe have to use ColumnReader count as segment meta size. int64_t _meta_mem_usage; RowsetId _rowset_id; diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index f6ca7b72a2a..cb076f28225 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -39,8 +39,9 @@ bool SegmentCache::lookup(const SegmentCache::CacheKey& key, SegmentCacheHandle* void SegmentCache::insert(const SegmentCache::CacheKey& key, SegmentCache::CacheValue& value, SegmentCacheHandle* handle) { - auto* lru_handle = LRUCachePolicy::insert( - key.encode(), &value, 1, value.segment->meta_mem_usage(), CachePriority::NORMAL); + auto* lru_handle = + LRUCachePolicy::insert(key.encode(), &value, value.segment->meta_mem_usage(), + value.segment->meta_mem_usage(), CachePriority::NORMAL); handle->push_segment(this, lru_handle); } diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index d952fd522d0..00fcbaf46e9 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -81,7 +81,7 @@ public: }; SegmentCache(size_t capacity) - : LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE, capacity, LRUCacheType::NUMBER, + : LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE, capacity, LRUCacheType::SIZE, config::tablet_rowset_stale_sweep_time_sec) {} // Lookup the given segment in the cache. diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 816206a1e8e..2757ed456ff 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -449,9 +449,18 @@ Status ExecEnv::_init_mem_env() { if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 2 / 5) { segment_cache_capacity = fd_number * 2 / 5; } + + int64_t segment_cache_mem_limit = + MemInfo::mem_limit() / 100 * config::segment_cache_memory_percentage; + // config::segment_cache_memory_percentage; + int64_t min_segment_cache_mem_limit = + min(segment_cache_mem_limit, segment_cache_capacity * + config::estimated_num_columns_per_segment * + config::estimated_mem_per_column_reader); + _segment_loader = new SegmentLoader(min_segment_cache_mem_limit); LOG(INFO) << "segment_cache_capacity <= fd_number * 2 / 5, fd_number: " << fd_number - << " segment_cache_capacity: " << segment_cache_capacity; - _segment_loader = new SegmentLoader(segment_cache_capacity); + << " segment_cache_capacity: " << segment_cache_capacity + << " min_segment_cache_mem_limit " << min_segment_cache_mem_limit; _schema_cache = new SchemaCache(config::schema_cache_capacity); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
