This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 3db16ad8b27 branch-3.1: [fix](segment) fix heap use after free for
VariantColumnReader and make _get_segment_footer thread safe in
ColumnReaderCache (#53872)
3db16ad8b27 is described below
commit 3db16ad8b27594b81d260c47c61797d79d2b69e3
Author: lihangyu <[email protected]>
AuthorDate: Fri Jul 25 10:22:55 2025 +0800
branch-3.1: [fix](segment) fix heap use after free for VariantColumnReader
and make _get_segment_footer thread safe in ColumnReaderCache (#53872)
---
be/src/common/config.cpp | 2 ++
.../olap/rowset/segment_v2/column_reader_cache.cpp | 24 +++++++++++++++++-----
.../olap/rowset/segment_v2/column_reader_cache.h | 2 +-
be/src/olap/rowset/segment_v2/segment.cpp | 1 +
be/src/olap/rowset/segment_v2/segment.h | 2 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 3 ++-
.../variant_column_writer_reader_test.cpp | 1 +
7 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 11641420d09..3a37d43756d 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1996,6 +1996,8 @@ Status set_fuzzy_configs() {
((distribution(*generator) % 2) == 0) ? "true" : "false";
fuzzy_field_and_value["string_overflow_size"] =
((distribution(*generator) % 2) == 0) ? "10" : "4294967295";
+ fuzzy_field_and_value["max_segment_partial_column_cache_size"] =
+ ((distribution(*generator) % 2) == 0) ? "5" : "10";
std::uniform_int_distribution<int64_t> distribution2(-2, 10);
fuzzy_field_and_value["segments_key_bounds_truncation_threshold"] =
diff --git a/be/src/olap/rowset/segment_v2/column_reader_cache.cpp
b/be/src/olap/rowset/segment_v2/column_reader_cache.cpp
index 5c3fa9d0157..30b6e9528f5 100644
--- a/be/src/olap/rowset/segment_v2/column_reader_cache.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader_cache.cpp
@@ -29,6 +29,10 @@ namespace doris::segment_v2 {
ColumnReaderCache::ColumnReaderCache(Segment* segment) : _segment(segment) {}
+ColumnReaderCache::~ColumnReaderCache() {
+ g_segment_column_cache_count << -_cache_map.size();
+}
+
std::shared_ptr<ColumnReader> ColumnReaderCache::_lookup(const
ColumnReaderCacheKey& key) {
std::lock_guard<std::mutex> lock(_cache_mutex);
auto it = _cache_map.find(key);
@@ -73,11 +77,13 @@ Status ColumnReaderCache::_insert(const
ColumnReaderCacheKey& key, const ColumnR
g_segment_column_cache_count << 1;
DCHECK(key.first >= 0) << " col_uid: " << key.first
<< " relative_path: " << key.second.get_path();
- VLOG_DEBUG << "insert cache: " << key.first << " " << key.second.get_path()
- << ", type: " << (int)reader_ptr->get_meta_type();
_lru_list.push_front(CacheNode {key, reader_ptr,
std::chrono::steady_clock::now()});
_cache_map[key] = _lru_list.begin();
*column_reader = reader_ptr;
+ VLOG_DEBUG << "insert cache: " << key.first << " " << key.second.get_path()
+ << ", type: " << (int)reader_ptr->get_meta_type()
+ << ", cache_size: " << _cache_map.size() << ", list_size: " <<
_lru_list.size()
+ << ", cache_map: " << _cache_map.size() << ", lru_list: " <<
_lru_list.size();
return Status::OK();
}
@@ -108,7 +114,11 @@ Status ColumnReaderCache::get_column_reader(int32_t
col_uid,
return Status::OK();
}
std::shared_ptr<SegmentFooterPB> footer_pb_shared;
- RETURN_IF_ERROR(_segment->_get_segment_footer(footer_pb_shared, stats));
+ {
+ std::lock_guard<std::mutex> lock(_cache_mutex);
+ // keep the lock until the footer is loaded, since _get_segment_footer
is not thread safe
+ RETURN_IF_ERROR(_segment->_get_segment_footer(footer_pb_shared,
stats));
+ }
// lazy create column reader from footer
const auto& col_footer_pb = footer_pb_shared->columns(it->second);
ColumnReaderOptions opts {
@@ -135,8 +145,8 @@ Status ColumnReaderCache::get_path_column_reader(uint32_t
col_uid,
return Status::OK();
}
const SubcolumnColumnMetaInfo::Node* node = node_hint;
+ std::shared_ptr<ColumnReader> variant_column_reader;
if (node == nullptr) {
- std::shared_ptr<ColumnReader> variant_column_reader;
RETURN_IF_ERROR(get_column_reader(col_uid, &variant_column_reader,
stats));
node = variant_column_reader
?
static_cast<VariantColumnReader*>(variant_column_reader.get())
@@ -147,7 +157,11 @@ Status ColumnReaderCache::get_path_column_reader(uint32_t
col_uid,
// lazy create column reader from footer
DCHECK_GE(node->data.footer_ordinal, 0);
std::shared_ptr<SegmentFooterPB> footer_pb_shared;
- RETURN_IF_ERROR(_segment->_get_segment_footer(footer_pb_shared,
stats));
+ {
+ std::lock_guard<std::mutex> lock(_cache_mutex);
+ // keep the lock until the footer is loaded, since
_get_segment_footer is not thread safe
+ RETURN_IF_ERROR(_segment->_get_segment_footer(footer_pb_shared,
stats));
+ }
const auto& col_footer_pb =
footer_pb_shared->columns(node->data.footer_ordinal);
ColumnReaderOptions opts {
.kept_in_memory = _segment->tablet_schema()->is_in_memory(),
diff --git a/be/src/olap/rowset/segment_v2/column_reader_cache.h
b/be/src/olap/rowset/segment_v2/column_reader_cache.h
index b00c1f6c87e..ce4060a5f26 100644
--- a/be/src/olap/rowset/segment_v2/column_reader_cache.h
+++ b/be/src/olap/rowset/segment_v2/column_reader_cache.h
@@ -41,7 +41,7 @@ struct CacheNode {
class ColumnReaderCache {
public:
explicit ColumnReaderCache(Segment* segment);
- virtual ~ColumnReaderCache() = default;
+ virtual ~ColumnReaderCache();
// Get all available readers
// if include_subcolumns is true, return all available readers, including
subcolumn readers
// otherwise, return only none variant subcolumn readers
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 7c362a64bb9..7bd7c978722 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -43,6 +43,7 @@
#include "olap/primary_key_index.h"
#include "olap/rowset/rowset_reader_context.h"
#include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/rowset/segment_v2/column_reader_cache.h"
#include "olap/rowset/segment_v2/empty_segment_iterator.h"
#include "olap/rowset/segment_v2/indexed_column_reader.h"
#include "olap/rowset/segment_v2/inverted_index_file_reader.h"
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index f99184071b7..d6d516b83e8 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -37,7 +37,6 @@
#include "olap/olap_common.h"
#include "olap/page_cache.h"
#include "olap/rowset/segment_v2/column_reader.h" // ColumnReader
-#include "olap/rowset/segment_v2/column_reader_cache.h"
#include "olap/rowset/segment_v2/page_handle.h"
#include "olap/schema.h"
#include "olap/tablet_schema.h"
@@ -68,6 +67,7 @@ class Segment;
class InvertedIndexIterator;
class InvertedIndexFileReader;
struct VariantStatistics;
+class ColumnReaderCache;
using SegmentSharedPtr = std::shared_ptr<Segment>;
// A Segment is used to represent a segment in memory format. When segment is
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index c1a70d56f10..facfba00027 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -50,6 +50,7 @@
#include "olap/primary_key_index.h"
#include "olap/rowset/segment_v2/bitmap_index_reader.h"
#include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/rowset/segment_v2/column_reader_cache.h"
#include "olap/rowset/segment_v2/indexed_column_reader.h"
#include "olap/rowset/segment_v2/inverted_index_file_reader.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
@@ -1089,8 +1090,8 @@ Status SegmentIterator::_init_inverted_index_iterators() {
const auto& column = _opts.tablet_schema->column(cid);
std::vector<const TabletIndex*> inverted_indexs;
// If the column is an extracted column, we need to find the
sub-column in the parent column reader.
+ std::shared_ptr<ColumnReader> column_reader;
if (column.is_extracted_column()) {
- std::shared_ptr<ColumnReader> column_reader;
if (!_segment->_column_reader_cache->get_column_reader(
column.parent_unique_id(), &column_reader,
_opts.stats) ||
column_reader == nullptr) {
diff --git
a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
index 464043f250d..ba2eca1926f 100644
--- a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
@@ -17,6 +17,7 @@
#include "gtest/gtest.h"
#include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/rowset/segment_v2/column_reader_cache.h"
#include "olap/rowset/segment_v2/variant/hierarchical_data_iterator.h"
#include "olap/rowset/segment_v2/variant/sparse_column_extract_iterator.h"
#include "olap/rowset/segment_v2/variant/sparse_column_merge_iterator.h"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]