This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 21aa3fca7fb [fix](inverted index) catch IO exception to avoid coredump 
in inverted index string reader (#51844)
21aa3fca7fb is described below

commit 21aa3fca7fb8736ebb033b14cf46959f9cdefd8f
Author: airborne12 <[email protected]>
AuthorDate: Wed Jun 18 17:41:09 2025 +0800

    [fix](inverted index) catch IO exception to avoid coredump in inverted 
index string reader (#51844)
    
    Problem Summary:
    This PR adds error handling around CLucene interactions in the string
    inverted index reader to prevent core dumps on IO failures and
    introduces.
---
 .../rowset/segment_v2/inverted_index_reader.cpp    |  78 +++++-----
 .../segment_v2/inverted_index_reader_test.cpp      | 169 ++++++++++++++++++++-
 2 files changed, 207 insertions(+), 40 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index cb33c830c3d..ef2047a2bcd 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -423,34 +423,33 @@ Status StringTypeInvertedIndexReader::query(const 
io::IOContext* io_ctx,
     std::string search_str(search_query->data, act_len);
     VLOG_DEBUG << "begin to query the inverted index from clucene"
                << ", column_name: " << column_name << ", search_str: " << 
search_str;
+    try {
+        auto index_file_key = 
_inverted_index_file_reader->get_index_file_cache_key(&_index_meta);
+        // try to get query bitmap result from cache and return immediately on 
cache hit
+        InvertedIndexQueryCache::CacheKey cache_key {index_file_key, 
column_name, query_type,
+                                                     search_str};
+        auto* cache = InvertedIndexQueryCache::instance();
+        InvertedIndexQueryCacheHandle cache_handler;
+        auto cache_status =
+                handle_query_cache(runtime_state, cache, cache_key, 
&cache_handler, stats, bit_map);
+        if (cache_status.ok()) {
+            return Status::OK();
+        }
 
-    auto index_file_key = 
_inverted_index_file_reader->get_index_file_cache_key(&_index_meta);
-    // try to get query bitmap result from cache and return immediately on 
cache hit
-    InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, 
query_type,
-                                                 search_str};
-    auto* cache = InvertedIndexQueryCache::instance();
-    InvertedIndexQueryCacheHandle cache_handler;
-    auto cache_status =
-            handle_query_cache(runtime_state, cache, cache_key, 
&cache_handler, stats, bit_map);
-    if (cache_status.ok()) {
-        return Status::OK();
-    }
-
-    std::wstring column_name_ws = StringUtil::string_to_wstring(column_name);
+        std::wstring column_name_ws = 
StringUtil::string_to_wstring(column_name);
 
-    InvertedIndexQueryInfo query_info;
-    query_info.field_name = column_name_ws;
-    query_info.terms.emplace_back(search_str);
+        InvertedIndexQueryInfo query_info;
+        query_info.field_name = column_name_ws;
+        query_info.terms.emplace_back(search_str);
 
-    auto result = std::make_shared<roaring::Roaring>();
-    FulltextIndexSearcherPtr* searcher_ptr = nullptr;
-    InvertedIndexCacheHandle inverted_index_cache_handle;
-    RETURN_IF_ERROR(
-            handle_searcher_cache(runtime_state, &inverted_index_cache_handle, 
io_ctx, stats));
-    auto searcher_variant = inverted_index_cache_handle.get_index_searcher();
-    searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
-    if (searcher_ptr != nullptr) {
-        try {
+        auto result = std::make_shared<roaring::Roaring>();
+        FulltextIndexSearcherPtr* searcher_ptr = nullptr;
+        InvertedIndexCacheHandle inverted_index_cache_handle;
+        RETURN_IF_ERROR(
+                handle_searcher_cache(runtime_state, 
&inverted_index_cache_handle, io_ctx, stats));
+        auto searcher_variant = 
inverted_index_cache_handle.get_index_searcher();
+        searcher_ptr = 
std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
+        if (searcher_ptr != nullptr) {
             switch (query_type) {
             case InvertedIndexQueryType::MATCH_ANY_QUERY:
             case InvertedIndexQueryType::MATCH_ALL_QUERY:
@@ -502,27 +501,28 @@ Status StringTypeInvertedIndexReader::query(const 
io::IOContext* io_ctx,
                 return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
                         "invalid query type when query untokenized inverted 
index");
             }
-        } catch (const CLuceneError& e) {
-            if (is_range_query(query_type) && e.number() == 
CL_ERR_TooManyClauses) {
-                return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>(
-                        "range query term exceeds limits, try to downgrade 
from inverted index, "
-                        "column "
-                        "name:{}, search_str:{}",
-                        column_name, search_str);
-            } else {
-                return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
-                        "CLuceneError occured, error msg: {}, column name: {}, 
search_str: {}",
-                        e.what(), column_name, search_str);
-            }
         }
-
         // add to cache
         result->runOptimize();
         cache->insert(cache_key, result, &cache_handler);
 
         bit_map = result;
+        return Status::OK();
+    } catch (const CLuceneError& e) {
+        if (is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) 
{
+            return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>(
+                    "range query term exceeds limits, try to downgrade from 
inverted index, "
+                    "column "
+                    "name:{}, search_str:{}",
+                    column_name, search_str);
+        } else {
+            LOG(ERROR) << "CLuceneError occurred, error msg: " << e.what()
+                       << ", column name: " << column_name << ", search_str: " 
<< search_str;
+            return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                    "CLuceneError occurred, error msg: {}, column name: {}, 
search_str: {}",
+                    e.what(), column_name, search_str);
+        }
     }
-    return Status::OK();
 }
 
 InvertedIndexReaderType StringTypeInvertedIndexReader::type() {
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
index 19623d0dfc5..e52c72d3b57 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
@@ -1848,6 +1848,163 @@ public:
         }
     }
 
+    class MockStringTypeInvertedIndexReader final : public 
StringTypeInvertedIndexReader {
+    public:
+        static std::shared_ptr<MockStringTypeInvertedIndexReader> 
create_shared(
+                const TabletIndex* idx_meta,
+                std::shared_ptr<InvertedIndexFileReader>& file_reader) {
+            return std::shared_ptr<MockStringTypeInvertedIndexReader>(
+                    new MockStringTypeInvertedIndexReader(idx_meta, 
file_reader));
+        }
+
+    protected:
+        Status handle_searcher_cache(RuntimeState*, InvertedIndexCacheHandle*, 
const io::IOContext*,
+                                     OlapReaderStatistics*) override {
+            CLuceneError err;
+            err.set(CL_ERR_IO, "mock handle_searcher_cache failure");
+            throw err;
+        }
+
+    private:
+        MockStringTypeInvertedIndexReader(const TabletIndex* idx_meta,
+                                          
std::shared_ptr<InvertedIndexFileReader>& file_reader)
+                : StringTypeInvertedIndexReader(idx_meta, file_reader) {}
+    };
+
+    // Mock class for testing tokenized index query exceptions
+    class MockTokenizedStringTypeInvertedIndexReader final : public 
FullTextIndexReader {
+    public:
+        static std::shared_ptr<MockTokenizedStringTypeInvertedIndexReader> 
create_shared(
+                const TabletIndex* idx_meta,
+                std::shared_ptr<InvertedIndexFileReader>& file_reader) {
+            return std::shared_ptr<MockTokenizedStringTypeInvertedIndexReader>(
+                    new MockTokenizedStringTypeInvertedIndexReader(idx_meta, 
file_reader));
+        }
+
+    protected:
+        Status handle_searcher_cache(RuntimeState*, InvertedIndexCacheHandle*, 
const io::IOContext*,
+                                     OlapReaderStatistics*) override {
+            CLuceneError err;
+            err.set(CL_ERR_IO, "mock tokenized index searcher cache failure");
+            throw err;
+        }
+
+    private:
+        MockTokenizedStringTypeInvertedIndexReader(
+                const TabletIndex* idx_meta, 
std::shared_ptr<InvertedIndexFileReader>& file_reader)
+                : FullTextIndexReader(idx_meta, file_reader) {}
+    };
+
+    void test_cache_error_scenarios() {
+        std::string_view rowset_id = "test_handle_searcher_cache_exception";
+        int seg_id = 0;
+        std::vector<Slice> values = {Slice("apple"), Slice("banana")};
+
+        TabletIndex idx_meta;
+        {
+            auto index_meta_pb = std::make_unique<TabletIndexPB>();
+            index_meta_pb->set_index_type(IndexType::INVERTED);
+            index_meta_pb->set_index_id(1);
+            index_meta_pb->set_index_name("test_mock_cache");
+            index_meta_pb->add_col_unique_id(1); // c2
+            idx_meta.init_from_pb(*index_meta_pb);
+        }
+
+        std::string index_path_prefix;
+        prepare_string_index(rowset_id, seg_id, values, &idx_meta, 
&index_path_prefix);
+
+        auto file_reader = std::make_shared<InvertedIndexFileReader>(
+                io::global_local_filesystem(), index_path_prefix, 
InvertedIndexStorageFormatPB::V2);
+        ASSERT_TRUE(file_reader->init().ok());
+
+        auto mock_reader = 
MockStringTypeInvertedIndexReader::create_shared(&idx_meta, file_reader);
+        ASSERT_NE(mock_reader, nullptr);
+
+        io::IOContext io_ctx;
+        OlapReaderStatistics stats;
+        RuntimeState runtime_state;
+        TQueryOptions opts;
+        opts.enable_inverted_index_searcher_cache = true;
+        runtime_state.set_query_options(opts);
+
+        std::shared_ptr<roaring::Roaring> bitmap = 
std::make_shared<roaring::Roaring>();
+        std::string field_name = "1"; // c2 unique_id
+        StringRef query_val(values[0].data, values[0].size);
+
+        Status st = mock_reader->query(&io_ctx, &stats, &runtime_state, 
field_name, &query_val,
+                                       InvertedIndexQueryType::EQUAL_QUERY, 
bitmap);
+
+        EXPECT_FALSE(st.ok());
+        EXPECT_EQ(st.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
+    }
+
+    void test_tokenized_index_query_error_scenarios() {
+        std::string_view rowset_id = "test_tokenized_index_query_exception";
+        int seg_id = 0;
+        std::vector<Slice> values = {Slice("Hello world this is a test"),
+                                     Slice("Apache Doris is a modern analytics 
database"),
+                                     Slice("Inverted index provides fast text 
search")};
+
+        TabletIndex idx_meta;
+        {
+            auto index_meta_pb = std::make_unique<TabletIndexPB>();
+            index_meta_pb->set_index_type(IndexType::INVERTED);
+            index_meta_pb->set_index_id(2);
+            index_meta_pb->set_index_name("test_tokenized_mock_cache");
+            index_meta_pb->add_col_unique_id(1); // c2
+
+            // Set tokenized index properties
+            auto* properties = index_meta_pb->mutable_properties();
+            (*properties)[INVERTED_INDEX_PARSER_KEY] = 
INVERTED_INDEX_PARSER_ENGLISH;
+            (*properties)[INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY] =
+                    INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES;
+            (*properties)[INVERTED_INDEX_PARSER_LOWERCASE_KEY] = 
INVERTED_INDEX_PARSER_TRUE;
+
+            idx_meta.init_from_pb(*index_meta_pb);
+        }
+
+        std::string index_path_prefix;
+        prepare_string_index(rowset_id, seg_id, values, &idx_meta, 
&index_path_prefix);
+
+        auto file_reader = std::make_shared<InvertedIndexFileReader>(
+                io::global_local_filesystem(), index_path_prefix, 
InvertedIndexStorageFormatPB::V2);
+        ASSERT_TRUE(file_reader->init().ok());
+
+        auto mock_reader =
+                
MockTokenizedStringTypeInvertedIndexReader::create_shared(&idx_meta, 
file_reader);
+        ASSERT_NE(mock_reader, nullptr);
+
+        io::IOContext io_ctx;
+        OlapReaderStatistics stats;
+        RuntimeState runtime_state;
+        TQueryOptions opts;
+        opts.enable_inverted_index_searcher_cache = true;
+        runtime_state.set_query_options(opts);
+
+        std::shared_ptr<roaring::Roaring> bitmap = 
std::make_shared<roaring::Roaring>();
+        std::string field_name = "1"; // c2 unique_id
+
+        // Test tokenized query with "world" which should be found in "Hello 
world this is a test"
+        std::string query_term = "world";
+        StringRef query_val(query_term.data(), query_term.size());
+
+        Status st = mock_reader->query(&io_ctx, &stats, &runtime_state, 
field_name, &query_val,
+                                       
InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap);
+
+        EXPECT_FALSE(st.ok());
+        EXPECT_EQ(st.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
+
+        // Test phrase query
+        std::string phrase_query = "Apache Doris";
+        StringRef phrase_query_val(phrase_query.data(), phrase_query.size());
+
+        st = mock_reader->query(&io_ctx, &stats, &runtime_state, field_name, 
&phrase_query_val,
+                                InvertedIndexQueryType::MATCH_PHRASE_QUERY, 
bitmap);
+
+        EXPECT_FALSE(st.ok());
+        EXPECT_EQ(st.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
+    }
+
 private:
     std::unique_ptr<InvertedIndexSearcherCache> _inverted_index_searcher_cache;
     std::unique_ptr<InvertedIndexQueryCache> _inverted_index_query_cache;
@@ -1893,4 +2050,14 @@ TEST_F(InvertedIndexReaderTest, CompatibleTest) {
     test_compatible_read_cross_platform();
 }
 
-} // namespace doris::segment_v2
\ No newline at end of file
+// Test cache error scenarios that could crash BE
+TEST_F(InvertedIndexReaderTest, CacheErrorScenarios) {
+    test_cache_error_scenarios();
+}
+
+// Test tokenized index query error scenarios
+TEST_F(InvertedIndexReaderTest, TokenizedIndexQueryErrorScenarios) {
+    test_tokenized_index_query_error_scenarios();
+}
+
+} // namespace doris::segment_v2


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to