This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 3db16ad8b27 branch-3.1: [fix](segment) fix heap use after free for 
VariantColumnReader and make _get_segment_footer thread safe in 
ColumnReaderCache (#53872)
3db16ad8b27 is described below

commit 3db16ad8b27594b81d260c47c61797d79d2b69e3
Author: lihangyu <[email protected]>
AuthorDate: Fri Jul 25 10:22:55 2025 +0800

    branch-3.1: [fix](segment) fix heap use after free for VariantColumnReader 
and make _get_segment_footer thread safe in ColumnReaderCache (#53872)
---
 be/src/common/config.cpp                           |  2 ++
 .../olap/rowset/segment_v2/column_reader_cache.cpp | 24 +++++++++++++++++-----
 .../olap/rowset/segment_v2/column_reader_cache.h   |  2 +-
 be/src/olap/rowset/segment_v2/segment.cpp          |  1 +
 be/src/olap/rowset/segment_v2/segment.h            |  2 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  3 ++-
 .../variant_column_writer_reader_test.cpp          |  1 +
 7 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 11641420d09..3a37d43756d 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1996,6 +1996,8 @@ Status set_fuzzy_configs() {
             ((distribution(*generator) % 2) == 0) ? "true" : "false";
     fuzzy_field_and_value["string_overflow_size"] =
             ((distribution(*generator) % 2) == 0) ? "10" : "4294967295";
+    fuzzy_field_and_value["max_segment_partial_column_cache_size"] =
+            ((distribution(*generator) % 2) == 0) ? "5" : "10";
 
     std::uniform_int_distribution<int64_t> distribution2(-2, 10);
     fuzzy_field_and_value["segments_key_bounds_truncation_threshold"] =
diff --git a/be/src/olap/rowset/segment_v2/column_reader_cache.cpp 
b/be/src/olap/rowset/segment_v2/column_reader_cache.cpp
index 5c3fa9d0157..30b6e9528f5 100644
--- a/be/src/olap/rowset/segment_v2/column_reader_cache.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader_cache.cpp
@@ -29,6 +29,10 @@ namespace doris::segment_v2 {
 
 ColumnReaderCache::ColumnReaderCache(Segment* segment) : _segment(segment) {}
 
+ColumnReaderCache::~ColumnReaderCache() {
+    g_segment_column_cache_count << -_cache_map.size();
+}
+
 std::shared_ptr<ColumnReader> ColumnReaderCache::_lookup(const 
ColumnReaderCacheKey& key) {
     std::lock_guard<std::mutex> lock(_cache_mutex);
     auto it = _cache_map.find(key);
@@ -73,11 +77,13 @@ Status ColumnReaderCache::_insert(const 
ColumnReaderCacheKey& key, const ColumnR
     g_segment_column_cache_count << 1;
     DCHECK(key.first >= 0) << " col_uid: " << key.first
                            << " relative_path: " << key.second.get_path();
-    VLOG_DEBUG << "insert cache: " << key.first << " " << key.second.get_path()
-               << ", type: " << (int)reader_ptr->get_meta_type();
     _lru_list.push_front(CacheNode {key, reader_ptr, 
std::chrono::steady_clock::now()});
     _cache_map[key] = _lru_list.begin();
     *column_reader = reader_ptr;
+    VLOG_DEBUG << "insert cache: " << key.first << " " << key.second.get_path()
+               << ", type: " << (int)reader_ptr->get_meta_type()
+               << ", cache_size: " << _cache_map.size() << ", list_size: " << 
_lru_list.size()
+               << ", cache_map: " << _cache_map.size() << ", lru_list: " << 
_lru_list.size();
     return Status::OK();
 }
 
@@ -108,7 +114,11 @@ Status ColumnReaderCache::get_column_reader(int32_t 
col_uid,
         return Status::OK();
     }
     std::shared_ptr<SegmentFooterPB> footer_pb_shared;
-    RETURN_IF_ERROR(_segment->_get_segment_footer(footer_pb_shared, stats));
+    {
+        std::lock_guard<std::mutex> lock(_cache_mutex);
+        // keep the lock until the footer is loaded, since _get_segment_footer 
is not thread safe
+        RETURN_IF_ERROR(_segment->_get_segment_footer(footer_pb_shared, 
stats));
+    }
     // lazy create column reader from footer
     const auto& col_footer_pb = footer_pb_shared->columns(it->second);
     ColumnReaderOptions opts {
@@ -135,8 +145,8 @@ Status ColumnReaderCache::get_path_column_reader(uint32_t 
col_uid,
         return Status::OK();
     }
     const SubcolumnColumnMetaInfo::Node* node = node_hint;
+    std::shared_ptr<ColumnReader> variant_column_reader;
     if (node == nullptr) {
-        std::shared_ptr<ColumnReader> variant_column_reader;
         RETURN_IF_ERROR(get_column_reader(col_uid, &variant_column_reader, 
stats));
         node = variant_column_reader
                        ? 
static_cast<VariantColumnReader*>(variant_column_reader.get())
@@ -147,7 +157,11 @@ Status ColumnReaderCache::get_path_column_reader(uint32_t 
col_uid,
         // lazy create column reader from footer
         DCHECK_GE(node->data.footer_ordinal, 0);
         std::shared_ptr<SegmentFooterPB> footer_pb_shared;
-        RETURN_IF_ERROR(_segment->_get_segment_footer(footer_pb_shared, 
stats));
+        {
+            std::lock_guard<std::mutex> lock(_cache_mutex);
+            // keep the lock until the footer is loaded, since 
_get_segment_footer is not thread safe
+            RETURN_IF_ERROR(_segment->_get_segment_footer(footer_pb_shared, 
stats));
+        }
         const auto& col_footer_pb = 
footer_pb_shared->columns(node->data.footer_ordinal);
         ColumnReaderOptions opts {
                 .kept_in_memory = _segment->tablet_schema()->is_in_memory(),
diff --git a/be/src/olap/rowset/segment_v2/column_reader_cache.h 
b/be/src/olap/rowset/segment_v2/column_reader_cache.h
index b00c1f6c87e..ce4060a5f26 100644
--- a/be/src/olap/rowset/segment_v2/column_reader_cache.h
+++ b/be/src/olap/rowset/segment_v2/column_reader_cache.h
@@ -41,7 +41,7 @@ struct CacheNode {
 class ColumnReaderCache {
 public:
     explicit ColumnReaderCache(Segment* segment);
-    virtual ~ColumnReaderCache() = default;
+    virtual ~ColumnReaderCache();
     // Get all available readers
     // if include_subcolumns is true, return all available readers, including 
subcolumn readers
     // otherwise, return only none variant subcolumn readers
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 7c362a64bb9..7bd7c978722 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -43,6 +43,7 @@
 #include "olap/primary_key_index.h"
 #include "olap/rowset/rowset_reader_context.h"
 #include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/rowset/segment_v2/column_reader_cache.h"
 #include "olap/rowset/segment_v2/empty_segment_iterator.h"
 #include "olap/rowset/segment_v2/indexed_column_reader.h"
 #include "olap/rowset/segment_v2/inverted_index_file_reader.h"
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index f99184071b7..d6d516b83e8 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -37,7 +37,6 @@
 #include "olap/olap_common.h"
 #include "olap/page_cache.h"
 #include "olap/rowset/segment_v2/column_reader.h" // ColumnReader
-#include "olap/rowset/segment_v2/column_reader_cache.h"
 #include "olap/rowset/segment_v2/page_handle.h"
 #include "olap/schema.h"
 #include "olap/tablet_schema.h"
@@ -68,6 +67,7 @@ class Segment;
 class InvertedIndexIterator;
 class InvertedIndexFileReader;
 struct VariantStatistics;
+class ColumnReaderCache;
 
 using SegmentSharedPtr = std::shared_ptr<Segment>;
 // A Segment is used to represent a segment in memory format. When segment is
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index c1a70d56f10..facfba00027 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -50,6 +50,7 @@
 #include "olap/primary_key_index.h"
 #include "olap/rowset/segment_v2/bitmap_index_reader.h"
 #include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/rowset/segment_v2/column_reader_cache.h"
 #include "olap/rowset/segment_v2/indexed_column_reader.h"
 #include "olap/rowset/segment_v2/inverted_index_file_reader.h"
 #include "olap/rowset/segment_v2/inverted_index_reader.h"
@@ -1089,8 +1090,8 @@ Status SegmentIterator::_init_inverted_index_iterators() {
             const auto& column = _opts.tablet_schema->column(cid);
             std::vector<const TabletIndex*> inverted_indexs;
             // If the column is an extracted column, we need to find the 
sub-column in the parent column reader.
+            std::shared_ptr<ColumnReader> column_reader;
             if (column.is_extracted_column()) {
-                std::shared_ptr<ColumnReader> column_reader;
                 if (!_segment->_column_reader_cache->get_column_reader(
                             column.parent_unique_id(), &column_reader, 
_opts.stats) ||
                     column_reader == nullptr) {
diff --git 
a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp 
b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
index 464043f250d..ba2eca1926f 100644
--- a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
@@ -17,6 +17,7 @@
 
 #include "gtest/gtest.h"
 #include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/rowset/segment_v2/column_reader_cache.h"
 #include "olap/rowset/segment_v2/variant/hierarchical_data_iterator.h"
 #include "olap/rowset/segment_v2/variant/sparse_column_extract_iterator.h"
 #include "olap/rowset/segment_v2/variant/sparse_column_merge_iterator.h"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to