This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b53d9444782 [Improvement](inverted index) lazy init inverted index 
file reader (#38979)
b53d9444782 is described below

commit b53d9444782891946ab10329331e777c1673a9fe
Author: airborne12 <[email protected]>
AuthorDate: Wed Aug 7 18:50:48 2024 +0800

    [Improvement](inverted index) lazy init inverted index file reader (#38979)
    
    ## Proposed changes
    
    We call InvertedIndexFileReader::init in segment_iterator
    init_inverted_index_iterators now, which cause searcher cache miss.
    So we lazy init when we open specific index.
---
 .../rowset/segment_v2/inverted_index_file_reader.cpp    | 17 +++++++++++------
 .../olap/rowset/segment_v2/inverted_index_file_reader.h |  1 +
 .../rowset/segment_v2/inverted_index_file_writer.cpp    |  4 +++-
 be/src/olap/rowset/segment_v2/inverted_index_reader.cpp | 14 ++++++++++++++
 be/src/olap/rowset/segment_v2/segment.cpp               |  9 +--------
 5 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
index dbd86bb93a5..09a6a62aaa6 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
@@ -28,13 +28,18 @@
 namespace doris::segment_v2 {
 
 Status InvertedIndexFileReader::init(int32_t read_buffer_size, bool 
open_idx_file_cache) {
-    _read_buffer_size = read_buffer_size;
-    _open_idx_file_cache = open_idx_file_cache;
-    if (_storage_format == InvertedIndexStorageFormatPB::V2) {
-        return _init_from_v2(read_buffer_size);
-    } else {
-        return Status::OK();
+    if (!_inited) {
+        _read_buffer_size = read_buffer_size;
+        _open_idx_file_cache = open_idx_file_cache;
+        if (_storage_format == InvertedIndexStorageFormatPB::V2) {
+            auto st = _init_from_v2(read_buffer_size);
+            if (!st.ok()) {
+                return st;
+            }
+        }
+        _inited = true;
     }
+    return Status::OK();
 }
 
 Status InvertedIndexFileReader::_init_from_v2(int32_t read_buffer_size) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h 
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
index fc0dd6e43d1..1414f493e4b 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
@@ -79,6 +79,7 @@ private:
     bool _open_idx_file_cache = false;
     InvertedIndexStorageFormatPB _storage_format;
     mutable std::shared_mutex _mutex; // Use mutable for const read operations
+    bool _inited = false;
 };
 
 } // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
index 3dd842b16d8..f2ac0e92265 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
@@ -115,7 +115,9 @@ Status InvertedIndexFileWriter::close() {
     }
     DBUG_EXECUTE_IF("inverted_index_storage_format_must_be_v2", {
         if (_storage_format != InvertedIndexStorageFormatPB::V2) {
-            _CLTHROWA(CL_ERR_IO, "inverted index storage format must be v2");
+            return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                    "InvertedIndexFileWriter::close fault injection:inverted 
index storage format "
+                    "must be v2");
         }
     })
     if (_storage_format == InvertedIndexStorageFormatPB::V1) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 02339c00080..72e373425ef 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -205,6 +205,13 @@ Status 
InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats,
 
         if (!dir) {
             // TODO: ugly code here, try to refact.
+            bool open_idx_file_cache = true;
+            auto st = 
_inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
+                                                        open_idx_file_cache);
+            if (!st.ok()) {
+                LOG(WARNING) << st;
+                return st;
+            }
             auto directory = 
DORIS_TRY(_inverted_index_file_reader->open(&_index_meta));
             dir = directory.release();
             owned_dir = true;
@@ -255,6 +262,13 @@ Status InvertedIndexReader::handle_searcher_cache(
         SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer);
         IndexSearcherPtr searcher;
 
+        bool open_idx_file_cache = true;
+        auto st = 
_inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
+                                                    open_idx_file_cache);
+        if (!st.ok()) {
+            LOG(WARNING) << st;
+            return st;
+        }
         auto dir = DORIS_TRY(_inverted_index_file_reader->open(&_index_meta));
         // try to reuse index_searcher's directory to read null_bitmap to cache
         // to avoid open directory additionally for null_bitmap
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 7f353cc0add..0208ed635e1 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -185,14 +185,7 @@ Status Segment::_open_inverted_index() {
             std::string {InvertedIndexDescriptor::get_index_file_path_prefix(
                     _file_reader->path().native())},
             _tablet_schema->get_inverted_index_storage_format());
-    bool open_idx_file_cache = true;
-    auto st = 
_inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
-                                                open_idx_file_cache);
-    if (st.is<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>()) {
-        LOG(INFO) << st;
-        return Status::OK();
-    }
-    return st;
+    return Status::OK();
 }
 
 Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& 
read_options,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to