This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b53d9444782 [Improvement](inverted index) lazy init inverted index
file reader (#38979)
b53d9444782 is described below
commit b53d9444782891946ab10329331e777c1673a9fe
Author: airborne12 <[email protected]>
AuthorDate: Wed Aug 7 18:50:48 2024 +0800
[Improvement](inverted index) lazy init inverted index file reader (#38979)
## Proposed changes
We call InvertedIndexFileReader::init in segment_iterator
init_inverted_index_iterators now, which cause searcher cache miss.
So we lazy init when we open specific index.
---
.../rowset/segment_v2/inverted_index_file_reader.cpp | 17 +++++++++++------
.../olap/rowset/segment_v2/inverted_index_file_reader.h | 1 +
.../rowset/segment_v2/inverted_index_file_writer.cpp | 4 +++-
be/src/olap/rowset/segment_v2/inverted_index_reader.cpp | 14 ++++++++++++++
be/src/olap/rowset/segment_v2/segment.cpp | 9 +--------
5 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
index dbd86bb93a5..09a6a62aaa6 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
@@ -28,13 +28,18 @@
namespace doris::segment_v2 {
Status InvertedIndexFileReader::init(int32_t read_buffer_size, bool
open_idx_file_cache) {
- _read_buffer_size = read_buffer_size;
- _open_idx_file_cache = open_idx_file_cache;
- if (_storage_format == InvertedIndexStorageFormatPB::V2) {
- return _init_from_v2(read_buffer_size);
- } else {
- return Status::OK();
+ if (!_inited) {
+ _read_buffer_size = read_buffer_size;
+ _open_idx_file_cache = open_idx_file_cache;
+ if (_storage_format == InvertedIndexStorageFormatPB::V2) {
+ auto st = _init_from_v2(read_buffer_size);
+ if (!st.ok()) {
+ return st;
+ }
+ }
+ _inited = true;
}
+ return Status::OK();
}
Status InvertedIndexFileReader::_init_from_v2(int32_t read_buffer_size) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
index fc0dd6e43d1..1414f493e4b 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
@@ -79,6 +79,7 @@ private:
bool _open_idx_file_cache = false;
InvertedIndexStorageFormatPB _storage_format;
mutable std::shared_mutex _mutex; // Use mutable for const read operations
+ bool _inited = false;
};
} // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
index 3dd842b16d8..f2ac0e92265 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
@@ -115,7 +115,9 @@ Status InvertedIndexFileWriter::close() {
}
DBUG_EXECUTE_IF("inverted_index_storage_format_must_be_v2", {
if (_storage_format != InvertedIndexStorageFormatPB::V2) {
- _CLTHROWA(CL_ERR_IO, "inverted index storage format must be v2");
+ return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+ "InvertedIndexFileWriter::close fault injection:inverted
index storage format "
+ "must be v2");
}
})
if (_storage_format == InvertedIndexStorageFormatPB::V1) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 02339c00080..72e373425ef 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -205,6 +205,13 @@ Status
InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats,
if (!dir) {
// TODO: ugly code here, try to refact.
+ bool open_idx_file_cache = true;
+ auto st =
_inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
+ open_idx_file_cache);
+ if (!st.ok()) {
+ LOG(WARNING) << st;
+ return st;
+ }
auto directory =
DORIS_TRY(_inverted_index_file_reader->open(&_index_meta));
dir = directory.release();
owned_dir = true;
@@ -255,6 +262,13 @@ Status InvertedIndexReader::handle_searcher_cache(
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer);
IndexSearcherPtr searcher;
+ bool open_idx_file_cache = true;
+ auto st =
_inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
+ open_idx_file_cache);
+ if (!st.ok()) {
+ LOG(WARNING) << st;
+ return st;
+ }
auto dir = DORIS_TRY(_inverted_index_file_reader->open(&_index_meta));
// try to reuse index_searcher's directory to read null_bitmap to cache
// to avoid open directory additionally for null_bitmap
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 7f353cc0add..0208ed635e1 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -185,14 +185,7 @@ Status Segment::_open_inverted_index() {
std::string {InvertedIndexDescriptor::get_index_file_path_prefix(
_file_reader->path().native())},
_tablet_schema->get_inverted_index_storage_format());
- bool open_idx_file_cache = true;
- auto st =
_inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
- open_idx_file_cache);
- if (st.is<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>()) {
- LOG(INFO) << st;
- return Status::OK();
- }
- return st;
+ return Status::OK();
}
Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions&
read_options,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]