This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 21aa3fca7fb [fix](inverted index) catch IO exception to avoid coredump
in inverted index string reader (#51844)
21aa3fca7fb is described below
commit 21aa3fca7fb8736ebb033b14cf46959f9cdefd8f
Author: airborne12 <[email protected]>
AuthorDate: Wed Jun 18 17:41:09 2025 +0800
[fix](inverted index) catch IO exception to avoid coredump in inverted
index string reader (#51844)
Problem Summary:
This PR adds error handling around CLucene interactions in the string
inverted index reader to prevent core dumps on IO failures and
introduces.
---
.../rowset/segment_v2/inverted_index_reader.cpp | 78 +++++-----
.../segment_v2/inverted_index_reader_test.cpp | 169 ++++++++++++++++++++-
2 files changed, 207 insertions(+), 40 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index cb33c830c3d..ef2047a2bcd 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -423,34 +423,33 @@ Status StringTypeInvertedIndexReader::query(const
io::IOContext* io_ctx,
std::string search_str(search_query->data, act_len);
VLOG_DEBUG << "begin to query the inverted index from clucene"
<< ", column_name: " << column_name << ", search_str: " <<
search_str;
+ try {
+ auto index_file_key =
_inverted_index_file_reader->get_index_file_cache_key(&_index_meta);
+ // try to get query bitmap result from cache and return immediately on
cache hit
+ InvertedIndexQueryCache::CacheKey cache_key {index_file_key,
column_name, query_type,
+ search_str};
+ auto* cache = InvertedIndexQueryCache::instance();
+ InvertedIndexQueryCacheHandle cache_handler;
+ auto cache_status =
+ handle_query_cache(runtime_state, cache, cache_key,
&cache_handler, stats, bit_map);
+ if (cache_status.ok()) {
+ return Status::OK();
+ }
- auto index_file_key =
_inverted_index_file_reader->get_index_file_cache_key(&_index_meta);
- // try to get query bitmap result from cache and return immediately on
cache hit
- InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name,
query_type,
- search_str};
- auto* cache = InvertedIndexQueryCache::instance();
- InvertedIndexQueryCacheHandle cache_handler;
- auto cache_status =
- handle_query_cache(runtime_state, cache, cache_key,
&cache_handler, stats, bit_map);
- if (cache_status.ok()) {
- return Status::OK();
- }
-
- std::wstring column_name_ws = StringUtil::string_to_wstring(column_name);
+ std::wstring column_name_ws =
StringUtil::string_to_wstring(column_name);
- InvertedIndexQueryInfo query_info;
- query_info.field_name = column_name_ws;
- query_info.terms.emplace_back(search_str);
+ InvertedIndexQueryInfo query_info;
+ query_info.field_name = column_name_ws;
+ query_info.terms.emplace_back(search_str);
- auto result = std::make_shared<roaring::Roaring>();
- FulltextIndexSearcherPtr* searcher_ptr = nullptr;
- InvertedIndexCacheHandle inverted_index_cache_handle;
- RETURN_IF_ERROR(
- handle_searcher_cache(runtime_state, &inverted_index_cache_handle,
io_ctx, stats));
- auto searcher_variant = inverted_index_cache_handle.get_index_searcher();
- searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
- if (searcher_ptr != nullptr) {
- try {
+ auto result = std::make_shared<roaring::Roaring>();
+ FulltextIndexSearcherPtr* searcher_ptr = nullptr;
+ InvertedIndexCacheHandle inverted_index_cache_handle;
+ RETURN_IF_ERROR(
+ handle_searcher_cache(runtime_state,
&inverted_index_cache_handle, io_ctx, stats));
+ auto searcher_variant =
inverted_index_cache_handle.get_index_searcher();
+ searcher_ptr =
std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
+ if (searcher_ptr != nullptr) {
switch (query_type) {
case InvertedIndexQueryType::MATCH_ANY_QUERY:
case InvertedIndexQueryType::MATCH_ALL_QUERY:
@@ -502,27 +501,28 @@ Status StringTypeInvertedIndexReader::query(const
io::IOContext* io_ctx,
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"invalid query type when query untokenized inverted
index");
}
- } catch (const CLuceneError& e) {
- if (is_range_query(query_type) && e.number() ==
CL_ERR_TooManyClauses) {
- return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>(
- "range query term exceeds limits, try to downgrade
from inverted index, "
- "column "
- "name:{}, search_str:{}",
- column_name, search_str);
- } else {
- return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
- "CLuceneError occured, error msg: {}, column name: {},
search_str: {}",
- e.what(), column_name, search_str);
- }
}
-
// add to cache
result->runOptimize();
cache->insert(cache_key, result, &cache_handler);
bit_map = result;
+ return Status::OK();
+ } catch (const CLuceneError& e) {
+ if (is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses)
{
+ return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>(
+ "range query term exceeds limits, try to downgrade from
inverted index, "
+ "column "
+ "name:{}, search_str:{}",
+ column_name, search_str);
+ } else {
+ LOG(ERROR) << "CLuceneError occurred, error msg: " << e.what()
+ << ", column name: " << column_name << ", search_str: "
<< search_str;
+ return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+ "CLuceneError occurred, error msg: {}, column name: {},
search_str: {}",
+ e.what(), column_name, search_str);
+ }
}
- return Status::OK();
}
InvertedIndexReaderType StringTypeInvertedIndexReader::type() {
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
index 19623d0dfc5..e52c72d3b57 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp
@@ -1848,6 +1848,163 @@ public:
}
}
+ class MockStringTypeInvertedIndexReader final : public
StringTypeInvertedIndexReader {
+ public:
+ static std::shared_ptr<MockStringTypeInvertedIndexReader>
create_shared(
+ const TabletIndex* idx_meta,
+ std::shared_ptr<InvertedIndexFileReader>& file_reader) {
+ return std::shared_ptr<MockStringTypeInvertedIndexReader>(
+ new MockStringTypeInvertedIndexReader(idx_meta,
file_reader));
+ }
+
+ protected:
+ Status handle_searcher_cache(RuntimeState*, InvertedIndexCacheHandle*,
const io::IOContext*,
+ OlapReaderStatistics*) override {
+ CLuceneError err;
+ err.set(CL_ERR_IO, "mock handle_searcher_cache failure");
+ throw err;
+ }
+
+ private:
+ MockStringTypeInvertedIndexReader(const TabletIndex* idx_meta,
+
std::shared_ptr<InvertedIndexFileReader>& file_reader)
+ : StringTypeInvertedIndexReader(idx_meta, file_reader) {}
+ };
+
+ // Mock class for testing tokenized index query exceptions
+ class MockTokenizedStringTypeInvertedIndexReader final : public
FullTextIndexReader {
+ public:
+ static std::shared_ptr<MockTokenizedStringTypeInvertedIndexReader>
create_shared(
+ const TabletIndex* idx_meta,
+ std::shared_ptr<InvertedIndexFileReader>& file_reader) {
+ return std::shared_ptr<MockTokenizedStringTypeInvertedIndexReader>(
+ new MockTokenizedStringTypeInvertedIndexReader(idx_meta,
file_reader));
+ }
+
+ protected:
+ Status handle_searcher_cache(RuntimeState*, InvertedIndexCacheHandle*,
const io::IOContext*,
+ OlapReaderStatistics*) override {
+ CLuceneError err;
+ err.set(CL_ERR_IO, "mock tokenized index searcher cache failure");
+ throw err;
+ }
+
+ private:
+ MockTokenizedStringTypeInvertedIndexReader(
+ const TabletIndex* idx_meta,
std::shared_ptr<InvertedIndexFileReader>& file_reader)
+ : FullTextIndexReader(idx_meta, file_reader) {}
+ };
+
+ void test_cache_error_scenarios() {
+ std::string_view rowset_id = "test_handle_searcher_cache_exception";
+ int seg_id = 0;
+ std::vector<Slice> values = {Slice("apple"), Slice("banana")};
+
+ TabletIndex idx_meta;
+ {
+ auto index_meta_pb = std::make_unique<TabletIndexPB>();
+ index_meta_pb->set_index_type(IndexType::INVERTED);
+ index_meta_pb->set_index_id(1);
+ index_meta_pb->set_index_name("test_mock_cache");
+ index_meta_pb->add_col_unique_id(1); // c2
+ idx_meta.init_from_pb(*index_meta_pb);
+ }
+
+ std::string index_path_prefix;
+ prepare_string_index(rowset_id, seg_id, values, &idx_meta,
&index_path_prefix);
+
+ auto file_reader = std::make_shared<InvertedIndexFileReader>(
+ io::global_local_filesystem(), index_path_prefix,
InvertedIndexStorageFormatPB::V2);
+ ASSERT_TRUE(file_reader->init().ok());
+
+ auto mock_reader =
MockStringTypeInvertedIndexReader::create_shared(&idx_meta, file_reader);
+ ASSERT_NE(mock_reader, nullptr);
+
+ io::IOContext io_ctx;
+ OlapReaderStatistics stats;
+ RuntimeState runtime_state;
+ TQueryOptions opts;
+ opts.enable_inverted_index_searcher_cache = true;
+ runtime_state.set_query_options(opts);
+
+ std::shared_ptr<roaring::Roaring> bitmap =
std::make_shared<roaring::Roaring>();
+ std::string field_name = "1"; // c2 unique_id
+ StringRef query_val(values[0].data, values[0].size);
+
+ Status st = mock_reader->query(&io_ctx, &stats, &runtime_state,
field_name, &query_val,
+ InvertedIndexQueryType::EQUAL_QUERY,
bitmap);
+
+ EXPECT_FALSE(st.ok());
+ EXPECT_EQ(st.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
+ }
+
+ void test_tokenized_index_query_error_scenarios() {
+ std::string_view rowset_id = "test_tokenized_index_query_exception";
+ int seg_id = 0;
+ std::vector<Slice> values = {Slice("Hello world this is a test"),
+ Slice("Apache Doris is a modern analytics
database"),
+ Slice("Inverted index provides fast text
search")};
+
+ TabletIndex idx_meta;
+ {
+ auto index_meta_pb = std::make_unique<TabletIndexPB>();
+ index_meta_pb->set_index_type(IndexType::INVERTED);
+ index_meta_pb->set_index_id(2);
+ index_meta_pb->set_index_name("test_tokenized_mock_cache");
+ index_meta_pb->add_col_unique_id(1); // c2
+
+ // Set tokenized index properties
+ auto* properties = index_meta_pb->mutable_properties();
+ (*properties)[INVERTED_INDEX_PARSER_KEY] =
INVERTED_INDEX_PARSER_ENGLISH;
+ (*properties)[INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY] =
+ INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES;
+ (*properties)[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
INVERTED_INDEX_PARSER_TRUE;
+
+ idx_meta.init_from_pb(*index_meta_pb);
+ }
+
+ std::string index_path_prefix;
+ prepare_string_index(rowset_id, seg_id, values, &idx_meta,
&index_path_prefix);
+
+ auto file_reader = std::make_shared<InvertedIndexFileReader>(
+ io::global_local_filesystem(), index_path_prefix,
InvertedIndexStorageFormatPB::V2);
+ ASSERT_TRUE(file_reader->init().ok());
+
+ auto mock_reader =
+
MockTokenizedStringTypeInvertedIndexReader::create_shared(&idx_meta,
file_reader);
+ ASSERT_NE(mock_reader, nullptr);
+
+ io::IOContext io_ctx;
+ OlapReaderStatistics stats;
+ RuntimeState runtime_state;
+ TQueryOptions opts;
+ opts.enable_inverted_index_searcher_cache = true;
+ runtime_state.set_query_options(opts);
+
+ std::shared_ptr<roaring::Roaring> bitmap =
std::make_shared<roaring::Roaring>();
+ std::string field_name = "1"; // c2 unique_id
+
+ // Test tokenized query with "world" which should be found in "Hello
world this is a test"
+ std::string query_term = "world";
+ StringRef query_val(query_term.data(), query_term.size());
+
+ Status st = mock_reader->query(&io_ctx, &stats, &runtime_state,
field_name, &query_val,
+
InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap);
+
+ EXPECT_FALSE(st.ok());
+ EXPECT_EQ(st.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
+
+ // Test phrase query
+ std::string phrase_query = "Apache Doris";
+ StringRef phrase_query_val(phrase_query.data(), phrase_query.size());
+
+ st = mock_reader->query(&io_ctx, &stats, &runtime_state, field_name,
&phrase_query_val,
+ InvertedIndexQueryType::MATCH_PHRASE_QUERY,
bitmap);
+
+ EXPECT_FALSE(st.ok());
+ EXPECT_EQ(st.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
+ }
+
private:
std::unique_ptr<InvertedIndexSearcherCache> _inverted_index_searcher_cache;
std::unique_ptr<InvertedIndexQueryCache> _inverted_index_query_cache;
@@ -1893,4 +2050,14 @@ TEST_F(InvertedIndexReaderTest, CompatibleTest) {
test_compatible_read_cross_platform();
}
-} // namespace doris::segment_v2
\ No newline at end of file
+// Test cache error scenarios that could crash BE
+TEST_F(InvertedIndexReaderTest, CacheErrorScenarios) {
+ test_cache_error_scenarios();
+}
+
+// Test tokenized index query error scenarios
+TEST_F(InvertedIndexReaderTest, TokenizedIndexQueryErrorScenarios) {
+ test_tokenized_index_query_error_scenarios();
+}
+
+} // namespace doris::segment_v2
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]