This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 64fef996197 branch-3.1: [feature](inverted index) add index searcher
after writing inverted index #43992 (#52877)
64fef996197 is described below
commit 64fef996197be78e408f1b69d8bcd10a89d083fd
Author: airborne12 <[email protected]>
AuthorDate: Tue Jul 8 10:32:24 2025 +0800
branch-3.1: [feature](inverted index) add index searcher after writing
inverted index #43992 (#52877)
cherry pick from #43992
---
be/src/common/config.cpp | 3 +-
be/src/common/config.h | 2 +-
.../segment_v2/inverted_index_file_reader.cpp | 1 +
.../rowset/segment_v2/inverted_index_file_reader.h | 4 +-
.../segment_v2/inverted_index_file_writer.cpp | 61 ++++
.../rowset/segment_v2/inverted_index_file_writer.h | 5 +
.../rowset/segment_v2/inverted_index_reader.cpp | 15 +-
.../olap/rowset/segment_v2/inverted_index_reader.h | 5 +-
.../segment_v2/inverted_index_file_writer_test.cpp | 374 ++++++++++++++++++++-
regression-test/pipeline/p0/conf/be.conf | 1 +
10 files changed, 448 insertions(+), 23 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index d472872cb7b..e521597bc27 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1142,10 +1142,9 @@ DEFINE_Int32(file_cache_downloader_thread_num_max, "32");
DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800");
DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600");
+DEFINE_mBool(enable_write_index_searcher_cache, "false");
// inverted index searcher cache size
DEFINE_String(inverted_index_searcher_cache_limit, "10%");
-// set `true` to enable insert searcher into cache when write inverted index
data
-DEFINE_Bool(enable_write_index_searcher_cache, "true");
DEFINE_Bool(enable_inverted_index_cache_check_timestamp, "true");
DEFINE_mBool(enable_inverted_index_correct_term_write, "true");
DEFINE_Int32(inverted_index_fd_number_limit_percent, "20"); // 20%
diff --git a/be/src/common/config.h b/be/src/common/config.h
index ec56c14b191..9844a832500 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1187,7 +1187,7 @@ DECLARE_mInt32(inverted_index_cache_stale_sweep_time_sec);
// inverted index searcher cache size
DECLARE_String(inverted_index_searcher_cache_limit);
// set `true` to enable insert searcher into cache when write inverted index
data
-DECLARE_Bool(enable_write_index_searcher_cache);
+DECLARE_mBool(enable_write_index_searcher_cache);
DECLARE_Bool(enable_inverted_index_cache_check_timestamp);
DECLARE_mBool(enable_inverted_index_correct_term_write);
DECLARE_Int32(inverted_index_fd_number_limit_percent); // 50%
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
index fc08e4e8a41..8dbac071290 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
@@ -228,6 +228,7 @@ Result<std::unique_ptr<DorisCompoundReader>>
InvertedIndexFileReader::_open(
}
return compound_reader;
}
+
Result<std::unique_ptr<DorisCompoundReader>> InvertedIndexFileReader::open(
const TabletIndex* index_meta, const io::IOContext* io_ctx) const {
auto index_id = index_meta->index_id();
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
index c4264f9b462..63dd89cf975 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
@@ -70,13 +70,15 @@ public:
Result<InvertedIndexDirectoryMap> get_all_directories();
// open file v2, init _stream
int64_t get_inverted_file_size() const { return _stream == nullptr ? 0 :
_stream->length(); }
+ friend InvertedIndexFileWriter;
-private:
+protected:
Status _init_from(int32_t read_buffer_size, const io::IOContext* io_ctx);
Result<std::unique_ptr<DorisCompoundReader>> _open(int64_t index_id,
const std::string&
index_suffix,
const io::IOContext*
io_ctx = nullptr) const;
+private:
IndicesEntriesMap _indices_entries;
std::unique_ptr<CL_NS(store)::IndexInput> _stream = nullptr;
const io::FileSystemSPtr _fs;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
index 4595d0e27c7..56e476a1e0c 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
@@ -25,7 +25,9 @@
#include "common/exception.h"
#include "common/status.h"
#include "io/fs/stream_sink_file_writer.h"
+#include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
#include "olap/rowset/segment_v2/inverted_index_desc.h"
+#include "olap/rowset/segment_v2/inverted_index_file_reader.h"
#include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
#include "olap/tablet_schema.h"
@@ -122,6 +124,62 @@ int64_t InvertedIndexFileWriter::headerLength() {
return header_size;
}
+Status InvertedIndexFileWriter::add_into_searcher_cache() {
+ auto inverted_index_file_reader =
+ std::make_unique<InvertedIndexFileReader>(_fs, _index_path_prefix,
_storage_format);
+ auto st = inverted_index_file_reader->init();
+ if (!st.ok()) {
+ if (dynamic_cast<io::StreamSinkFileWriter*>(_idx_v2_writer.get()) !=
nullptr) {
+ //StreamSinkFileWriter not found file is normal.
+ return Status::OK();
+ }
+ LOG(WARNING) << "InvertedIndexFileWriter::add_into_searcher_cache for "
+ << _index_path_prefix << ", error " << st.msg();
+ return st;
+ }
+ for (const auto& entry : _indices_dirs) {
+ auto index_meta = entry.first;
+ auto dir =
+ DORIS_TRY(inverted_index_file_reader->_open(index_meta.first,
index_meta.second));
+ auto index_file_key =
InvertedIndexDescriptor::get_index_file_cache_key(
+ _index_path_prefix, index_meta.first, index_meta.second);
+ InvertedIndexSearcherCache::CacheKey
searcher_cache_key(index_file_key);
+ InvertedIndexCacheHandle inverted_index_cache_handle;
+ if (InvertedIndexSearcherCache::instance()->lookup(searcher_cache_key,
+
&inverted_index_cache_handle)) {
+ auto st = InvertedIndexSearcherCache::instance()->erase(
+ searcher_cache_key.index_file_path);
+ if (!st.ok()) {
+ LOG(WARNING) <<
"InvertedIndexFileWriter::add_into_searcher_cache for "
+ << _index_path_prefix << ", error " << st.msg();
+ }
+ }
+ IndexSearcherPtr searcher;
+ size_t reader_size = 0;
+ auto index_searcher_builder =
DORIS_TRY(_construct_index_searcher_builder(dir.get()));
+ RETURN_IF_ERROR(InvertedIndexReader::create_index_searcher(
+ index_searcher_builder.get(), dir.release(), &searcher,
reader_size));
+ auto* cache_value = new
InvertedIndexSearcherCache::CacheValue(std::move(searcher),
+
reader_size, UnixMillis());
+ InvertedIndexSearcherCache::instance()->insert(searcher_cache_key,
cache_value);
+ }
+ return Status::OK();
+}
+
+Result<std::unique_ptr<IndexSearcherBuilder>>
+InvertedIndexFileWriter::_construct_index_searcher_builder(const
DorisCompoundReader* dir) {
+ std::vector<std::string> files;
+ dir->list(&files);
+ auto reader_type = InvertedIndexReaderType::FULLTEXT;
+ bool found_bkd = std::any_of(files.begin(), files.end(), [](const
std::string& file) {
+ return file ==
InvertedIndexDescriptor::get_temporary_bkd_index_data_file_name();
+ });
+ if (found_bkd) {
+ reader_type = InvertedIndexReaderType::BKD;
+ }
+ return IndexSearcherBuilder::create_index_searcher_builder(reader_type);
+}
+
Status InvertedIndexFileWriter::close() {
DCHECK(!_closed) << debug_string();
_closed = true;
@@ -172,6 +230,9 @@ Status InvertedIndexFileWriter::close() {
err.what());
}
}
+ if (config::enable_write_index_searcher_cache) {
+ return add_into_searcher_cache();
+ }
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
index ab7cdbff152..c405d77196e 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h
@@ -30,7 +30,9 @@
#include "io/fs/file_writer.h"
#include "io/fs/local_file_system.h"
#include "olap/rowset/segment_v2/inverted_index_common.h"
+#include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
#include "olap/rowset/segment_v2/inverted_index_desc.h"
+#include "olap/rowset/segment_v2/inverted_index_searcher.h"
#include "runtime/exec_env.h"
namespace doris {
@@ -71,6 +73,7 @@ public:
Status delete_index(const TabletIndex* index_meta);
Status initialize(InvertedIndexDirectoryMap& indices_dirs);
virtual ~InvertedIndexFileWriter() = default;
+ Status add_into_searcher_cache();
Status write();
Status write_v1();
Status close();
@@ -148,6 +151,8 @@ private:
const std::vector<FileMetadata>& file_metadata);
Status _insert_directory_into_map(int64_t index_id, const std::string&
index_suffix,
std::shared_ptr<DorisFSDirectory> dir);
+ virtual Result<std::unique_ptr<IndexSearcherBuilder>>
_construct_index_searcher_builder(
+ const DorisCompoundReader* dir);
// Member variables...
InvertedIndexDirectoryMap _indices_dirs;
const io::FileSystemSPtr _fs;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index bb6dee6d9a7..6398b057996 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -173,7 +173,10 @@ Status InvertedIndexReader::handle_searcher_cache(
InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
static_cast<void>(read_null_bitmap(io_ctx, stats,
&null_bitmap_cache_handle, dir.get()));
size_t reader_size = 0;
- RETURN_IF_ERROR(create_index_searcher(dir.release(), &searcher,
type(), reader_size));
+ auto index_searcher_builder =
+
DORIS_TRY(IndexSearcherBuilder::create_index_searcher_builder(type()));
+ RETURN_IF_ERROR(create_index_searcher(index_searcher_builder.get(),
dir.release(),
+ &searcher, reader_size));
auto* cache_value = new
InvertedIndexSearcherCache::CacheValue(std::move(searcher),
reader_size, UnixMillis());
InvertedIndexSearcherCache::instance()->insert(searcher_cache_key,
cache_value,
@@ -182,13 +185,9 @@ Status InvertedIndexReader::handle_searcher_cache(
}
}
-Status InvertedIndexReader::create_index_searcher(lucene::store::Directory*
dir,
- IndexSearcherPtr* searcher,
- InvertedIndexReaderType
reader_type,
- size_t& reader_size) {
- auto index_searcher_builder =
-
DORIS_TRY(IndexSearcherBuilder::create_index_searcher_builder(reader_type));
-
+Status InvertedIndexReader::create_index_searcher(IndexSearcherBuilder*
index_searcher_builder,
+ lucene::store::Directory*
dir,
+ IndexSearcherPtr* searcher,
size_t& reader_size) {
auto searcher_result =
DORIS_TRY(index_searcher_builder->get_index_searcher(dir));
*searcher = searcher_result;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index 2a5fc72a78d..96ba52adab2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -226,8 +226,9 @@ public:
virtual Status handle_searcher_cache(InvertedIndexCacheHandle*
inverted_index_cache_handle,
const io::IOContext* io_ctx,
OlapReaderStatistics* stats);
std::string get_index_file_path();
- static Status create_index_searcher(lucene::store::Directory* dir,
IndexSearcherPtr* searcher,
- InvertedIndexReaderType reader_type,
size_t& reader_size);
+ static Status create_index_searcher(IndexSearcherBuilder*
index_searcher_builder,
+ lucene::store::Directory* dir,
IndexSearcherPtr* searcher,
+ size_t& reader_size);
protected:
Status match_index_search(const io::IOContext* io_ctx,
OlapReaderStatistics* stats,
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
index 2aead76daee..18daaa42f93 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
@@ -22,12 +22,10 @@
#include <gtest/gtest-test-part.h>
#include <gtest/gtest.h>
-#include "olap/rowset/rowset_factory.h"
#include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
#include "olap/storage_engine.h"
-namespace doris {
-namespace segment_v2 {
+namespace doris::segment_v2 {
using namespace doris::vectorized;
@@ -64,10 +62,9 @@ protected:
ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
// use memory limit
- int64_t inverted_index_cache_limit = 0;
+ int64_t inverted_index_cache_limit = 1024 * 1024 * 1024;
_inverted_index_searcher_cache =
std::unique_ptr<segment_v2::InvertedIndexSearcherCache>(
-
InvertedIndexSearcherCache::create_global_instance(inverted_index_cache_limit,
- 256));
+
InvertedIndexSearcherCache::create_global_instance(inverted_index_cache_limit,
1));
ExecEnv::GetInstance()->set_inverted_index_searcher_cache(
_inverted_index_searcher_cache.get());
@@ -194,7 +191,9 @@ TEST_F(InvertedIndexFileWriterTest, WriteV1Test) {
dir->close();
Status close_status = writer.close();
- if (!close_status.ok()) std::cout << "close error:" << close_status.msg()
<< std::endl;
+ if (!close_status.ok()) {
+ std::cout << "close error:" << close_status.msg() << std::endl;
+ }
ASSERT_TRUE(close_status.ok());
const InvertedIndexFileInfo* file_info = writer.get_index_file_info();
@@ -657,5 +656,362 @@ TEST_F(InvertedIndexFileWriterTest,
CopyFileEmptyFileTest) {
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(local_fs_index_path).ok());
}
-} // namespace segment_v2
-} // namespace doris
\ No newline at end of file
+class MockInvertedIndexFileWriter : public InvertedIndexFileWriter {
+public:
+ MOCK_METHOD(Result<std::unique_ptr<IndexSearcherBuilder>>,
_construct_index_searcher_builder,
+ (const DorisCompoundReader* dir), (override));
+ MockInvertedIndexFileWriter(io::FileSystemSPtr fs, const std::string&
index_path_prefix,
+ const std::string& rowset_id, int64_t seg_id,
+ InvertedIndexStorageFormatPB storage_format,
+ io::FileWriterPtr file_writer)
+ : InvertedIndexFileWriter(fs, index_path_prefix, rowset_id,
seg_id, storage_format,
+ std::move(file_writer)) {}
+};
+
+class MockIndexSearcherBuilder : public IndexSearcherBuilder {
+public:
+ MOCK_METHOD(Result<IndexSearcherPtr>, get_index_searcher,
+ (lucene::store::Directory * directory), (override));
+ MOCK_METHOD(Status, build,
+ (lucene::store::Directory * directory,
OptionalIndexSearcherPtr& output_searcher),
+ (override));
+ MockIndexSearcherBuilder() = default;
+ ~MockIndexSearcherBuilder() override = default;
+};
+
+TEST_F(InvertedIndexFileWriterTest, AddIntoSearcherCacheTest) {
+ config::enable_write_index_searcher_cache = true;
+ io::FileWriterPtr file_writer;
+ std::string index_path =
InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix);
+ io::FileWriterOptions opts;
+ Status st = _fs->create_file(index_path, &file_writer, &opts);
+ ASSERT_TRUE(st.ok());
+ MockInvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id,
_seg_id,
+ InvertedIndexStorageFormatPB::V2,
std::move(file_writer));
+
+ int64_t index_id = 1;
+ std::string index_suffix = "suffix1";
+ auto index_meta = create_mock_tablet_index(index_id, index_suffix);
+ ASSERT_NE(index_meta, nullptr);
+ auto open_result = writer.open(index_meta.get());
+ ASSERT_TRUE(open_result.has_value());
+ auto dir = open_result.value();
+ auto out_file = std::unique_ptr<lucene::store::IndexOutput>(
+ dir->createOutput("add_into_searcher_cache_test"));
+ out_file->writeString("test");
+ out_file->close();
+ dir->close();
+ auto mock_builder = std::make_unique<MockIndexSearcherBuilder>();
+
+ EXPECT_CALL(*mock_builder, get_index_searcher(testing::_))
+ .WillOnce(testing::Invoke(
+ [](lucene::store::Directory* directory) ->
Result<IndexSearcherPtr> {
+ auto close_directory = true;
+ auto bkd_reader =
std::make_shared<lucene::util::bkd::bkd_reader>(
+ directory, close_directory);
+ _CLDECDELETE(directory)
+ return bkd_reader;
+ }));
+
+ EXPECT_CALL(writer, _construct_index_searcher_builder(testing::_))
+
.WillOnce(testing::Return(testing::ByMove(std::move(mock_builder))));
+
+ Status close_status = writer.close();
+ ASSERT_TRUE(close_status.ok());
+
+ auto index_file_key =
InvertedIndexDescriptor::get_index_file_cache_key(_index_path_prefix,
+
index_id, index_suffix);
+ InvertedIndexSearcherCache::CacheKey searcher_cache_key(index_file_key);
+ InvertedIndexCacheHandle cache_handle;
+ bool found = _inverted_index_searcher_cache->lookup(searcher_cache_key,
&cache_handle);
+ ASSERT_TRUE(found);
+ auto* cache_value_use_cache = cache_handle.get_index_cache_value();
+ EXPECT_GE(UnixMillis(), cache_value_use_cache->last_visit_time);
+ auto searcher_variant = cache_value_use_cache->index_searcher;
+ EXPECT_TRUE(std::holds_alternative<BKDIndexSearcherPtr>(searcher_variant));
+ config::enable_write_index_searcher_cache = false;
+}
+
+TEST_F(InvertedIndexFileWriterTest, CacheEvictionTest) {
+ config::enable_write_index_searcher_cache = true;
+ io::FileWriterPtr file_writer1, file_writer2, file_writer3;
+ std::string index_path1 =
+ InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix
+ "_1");
+ std::string index_path2 =
+ InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix
+ "_2");
+ std::string index_path3 =
+ InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix
+ "_3");
+ io::FileWriterOptions opts;
+ Status st1 = _fs->create_file(index_path1, &file_writer1, &opts);
+ Status st2 = _fs->create_file(index_path2, &file_writer2, &opts);
+ Status st3 = _fs->create_file(index_path3, &file_writer3, &opts);
+ ASSERT_TRUE(st1.ok());
+ ASSERT_TRUE(st2.ok());
+ ASSERT_TRUE(st3.ok());
+ MockInvertedIndexFileWriter writer1(_fs, _index_path_prefix + "_1",
"rowset1", 1,
+ InvertedIndexStorageFormatPB::V2,
std::move(file_writer1));
+ int64_t index_id1 = 1;
+ std::string index_suffix1 = "suffix1";
+ auto index_meta1 = create_mock_tablet_index(index_id1, index_suffix1);
+ ASSERT_NE(index_meta1, nullptr);
+ auto open_result1 = writer1.open(index_meta1.get());
+ ASSERT_TRUE(open_result1.has_value());
+ auto dir1 = open_result1.value();
+ auto out_file1 =
+
std::unique_ptr<lucene::store::IndexOutput>(dir1->createOutput("cache_eviction_test1"));
+ out_file1->writeString("test1");
+ out_file1->close();
+ dir1->close();
+ auto mock_builder1 = std::make_unique<MockIndexSearcherBuilder>();
+
+ EXPECT_CALL(*mock_builder1, get_index_searcher(testing::_))
+ .WillOnce(testing::Invoke(
+ [](lucene::store::Directory* directory) ->
Result<IndexSearcherPtr> {
+ auto close_directory = true;
+ auto bkd_reader =
std::make_shared<lucene::util::bkd::bkd_reader>(
+ directory, close_directory);
+ _CLDECDELETE(directory)
+ return bkd_reader;
+ }));
+
+ EXPECT_CALL(writer1, _construct_index_searcher_builder(testing::_))
+
.WillOnce(testing::Return(testing::ByMove(std::move(mock_builder1))));
+
+ Status close_status1 = writer1.close();
+ ASSERT_TRUE(close_status1.ok());
+
+ MockInvertedIndexFileWriter writer2(_fs, _index_path_prefix + "_2",
"rowset2", 2,
+ InvertedIndexStorageFormatPB::V2,
std::move(file_writer2));
+ int64_t index_id2 = 2;
+ std::string index_suffix2 = "suffix2";
+ auto index_meta2 = create_mock_tablet_index(index_id2, index_suffix2);
+ ASSERT_NE(index_meta2, nullptr);
+ auto open_result2 = writer2.open(index_meta2.get());
+ ASSERT_TRUE(open_result2.has_value());
+ auto dir2 = open_result2.value();
+ auto out_file2 =
+
std::unique_ptr<lucene::store::IndexOutput>(dir2->createOutput("cache_eviction_test2"));
+ out_file2->writeString("test2");
+ out_file2->close();
+ dir2->close();
+ auto mock_builder2 = std::make_unique<MockIndexSearcherBuilder>();
+
+ EXPECT_CALL(*mock_builder2, get_index_searcher(testing::_))
+ .WillOnce(testing::Invoke(
+ [](lucene::store::Directory* directory) ->
Result<IndexSearcherPtr> {
+ auto close_directory = true;
+ auto bkd_reader =
std::make_shared<lucene::util::bkd::bkd_reader>(
+ directory, close_directory);
+ _CLDECDELETE(directory)
+ return bkd_reader;
+ }));
+
+ EXPECT_CALL(writer2, _construct_index_searcher_builder(testing::_))
+
.WillOnce(testing::Return(testing::ByMove(std::move(mock_builder2))));
+
+ Status close_status2 = writer2.close();
+ ASSERT_TRUE(close_status2.ok());
+
+ MockInvertedIndexFileWriter writer3(_fs, _index_path_prefix + "_3",
"rowset3", 3,
+ InvertedIndexStorageFormatPB::V2,
std::move(file_writer3));
+ int64_t index_id3 = 3;
+ std::string index_suffix3 = "suffix3";
+ auto index_meta3 = create_mock_tablet_index(index_id3, index_suffix3);
+ ASSERT_NE(index_meta3, nullptr);
+ auto open_result3 = writer3.open(index_meta3.get());
+ ASSERT_TRUE(open_result3.has_value());
+ auto dir3 = open_result3.value();
+ auto out_file3 =
+
std::unique_ptr<lucene::store::IndexOutput>(dir3->createOutput("cache_eviction_test3"));
+ out_file3->writeString("test3");
+ out_file3->close();
+ dir3->close();
+ auto mock_builder3 = std::make_unique<MockIndexSearcherBuilder>();
+
+ EXPECT_CALL(*mock_builder3, get_index_searcher(testing::_))
+ .WillOnce(testing::Invoke(
+ [](lucene::store::Directory* directory) ->
Result<IndexSearcherPtr> {
+ auto close_directory = true;
+ auto bkd_reader =
std::make_shared<lucene::util::bkd::bkd_reader>(
+ directory, close_directory);
+ _CLDECDELETE(directory)
+ return bkd_reader;
+ }));
+ EXPECT_CALL(writer3, _construct_index_searcher_builder(testing::_))
+
.WillOnce(testing::Return(testing::ByMove(std::move(mock_builder3))));
+
+ Status close_status3 = writer3.close();
+ ASSERT_TRUE(close_status3.ok());
+
+ InvertedIndexCacheHandle cache_handle1;
+ std::string index_file_key1 =
InvertedIndexDescriptor::get_index_file_cache_key(
+ _index_path_prefix + "_1", index_id1, index_suffix1);
+ bool found1 = _inverted_index_searcher_cache->lookup(index_file_key1,
&cache_handle1);
+ // limit to 2 in BE_TEST, so the first one should be evicted
+ ASSERT_FALSE(found1);
+
+ InvertedIndexCacheHandle cache_handle2;
+ std::string index_file_key2 =
InvertedIndexDescriptor::get_index_file_cache_key(
+ _index_path_prefix + "_2", index_id2, index_suffix2);
+ bool found2 = _inverted_index_searcher_cache->lookup(index_file_key2,
&cache_handle2);
+ ASSERT_TRUE(found2);
+ auto* cache_value_use_cache2 = cache_handle2.get_index_cache_value();
+ EXPECT_GE(UnixMillis(), cache_value_use_cache2->last_visit_time);
+ auto searcher_variant2 = cache_value_use_cache2->index_searcher;
+
EXPECT_TRUE(std::holds_alternative<BKDIndexSearcherPtr>(searcher_variant2));
+
+ InvertedIndexCacheHandle cache_handle3;
+ std::string index_file_key3 =
InvertedIndexDescriptor::get_index_file_cache_key(
+ _index_path_prefix + "_3", index_id3, index_suffix3);
+ bool found3 = _inverted_index_searcher_cache->lookup(index_file_key3,
&cache_handle3);
+ ASSERT_TRUE(found3);
+ auto* cache_value_use_cache3 = cache_handle3.get_index_cache_value();
+ EXPECT_GE(UnixMillis(), cache_value_use_cache3->last_visit_time);
+ auto searcher_variant3 = cache_value_use_cache3->index_searcher;
+
EXPECT_TRUE(std::holds_alternative<BKDIndexSearcherPtr>(searcher_variant3));
+
+ config::enable_write_index_searcher_cache = false;
+}
+
+TEST_F(InvertedIndexFileWriterTest, CacheUpdateTest) {
+ config::enable_write_index_searcher_cache = true;
+ io::FileWriterPtr file_writer;
+ std::string index_path =
+ InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix
+ "_update");
+ io::FileWriterOptions opts;
+ Status st = _fs->create_file(index_path, &file_writer, &opts);
+ ASSERT_TRUE(st.ok());
+ MockInvertedIndexFileWriter writer(_fs, _index_path_prefix + "_update",
"rowset_update", 3,
+ InvertedIndexStorageFormatPB::V2,
std::move(file_writer));
+
+ int64_t index_id = 3;
+ std::string index_suffix = "suffix3";
+ auto index_meta = create_mock_tablet_index(index_id, index_suffix);
+ ASSERT_NE(index_meta, nullptr);
+ auto open_result = writer.open(index_meta.get());
+ ASSERT_TRUE(open_result.has_value());
+ auto dir = open_result.value();
+ auto out_file = std::unique_ptr<lucene::store::IndexOutput>(
+ dir->createOutput("write_v2_test_index_update"));
+ out_file->writeString("test_update");
+ out_file->close();
+ dir->close();
+ auto mock_builder = std::make_unique<MockIndexSearcherBuilder>();
+
+ EXPECT_CALL(*mock_builder, get_index_searcher(testing::_))
+ .WillOnce(testing::Invoke(
+ [](lucene::store::Directory* directory) ->
Result<IndexSearcherPtr> {
+ auto close_directory = true;
+ auto bkd_reader =
std::make_shared<lucene::util::bkd::bkd_reader>(
+ directory, close_directory);
+ _CLDECDELETE(directory)
+ return bkd_reader;
+ }));
+
+ EXPECT_CALL(writer, _construct_index_searcher_builder(testing::_))
+
.WillOnce(testing::Return(testing::ByMove(std::move(mock_builder))));
+
+ Status close_status = writer.close();
+ ASSERT_TRUE(close_status.ok());
+
+ auto index_file_key = InvertedIndexDescriptor::get_index_file_cache_key(
+ _index_path_prefix + "_update", index_id, index_suffix);
+ InvertedIndexSearcherCache::CacheKey searcher_cache_key(index_file_key);
+ InvertedIndexCacheHandle cache_handle;
+ bool found = _inverted_index_searcher_cache->lookup(searcher_cache_key,
&cache_handle);
+ ASSERT_TRUE(found);
+
+ io::FileWriterPtr file_writer_new;
+ Status st_new = _fs->create_file(index_path, &file_writer_new, &opts);
+ ASSERT_TRUE(st_new.ok());
+ MockInvertedIndexFileWriter writer_new(_fs, _index_path_prefix +
"_update", "rowset_update", 3,
+ InvertedIndexStorageFormatPB::V2,
+ std::move(file_writer_new));
+
+ auto open_result_new = writer_new.open(index_meta.get());
+ ASSERT_TRUE(open_result_new.has_value());
+ auto dir_new = open_result_new.value();
+ auto out_file_new = std::unique_ptr<lucene::store::IndexOutput>(
+ dir_new->createOutput("write_v2_test_index_update_new"));
+ out_file_new->writeString("test_update_new");
+ out_file_new->close();
+ dir_new->close();
+ auto mock_builder_new = std::make_unique<MockIndexSearcherBuilder>();
+
+ EXPECT_CALL(*mock_builder_new, get_index_searcher(testing::_))
+ .WillOnce(testing::Invoke(
+ [](lucene::store::Directory* directory) ->
Result<IndexSearcherPtr> {
+ auto close_directory = true;
+ auto bkd_reader =
std::make_shared<lucene::util::bkd::bkd_reader>(
+ directory, close_directory);
+ _CLDECDELETE(directory)
+ return bkd_reader;
+ }));
+
+ EXPECT_CALL(writer_new, _construct_index_searcher_builder(testing::_))
+
.WillOnce(testing::Return(testing::ByMove(std::move(mock_builder_new))));
+
+ Status close_status_new = writer_new.close();
+ ASSERT_TRUE(close_status_new.ok());
+
+ InvertedIndexCacheHandle cache_handle_new;
+ bool found_new =
_inverted_index_searcher_cache->lookup(searcher_cache_key, &cache_handle_new);
+ ASSERT_TRUE(found_new);
+ EXPECT_NE(cache_handle.get_index_cache_value()->index_searcher,
+ cache_handle_new.get_index_cache_value()->index_searcher);
+
+ config::enable_write_index_searcher_cache = false;
+}
+
+TEST_F(InvertedIndexFileWriterTest, AddIntoSearcherCacheV1Test) {
+ config::enable_write_index_searcher_cache = true;
+ io::FileWriterPtr file_writer;
+ std::string index_suffix = "suffix_v1";
+ std::string index_path = InvertedIndexDescriptor::get_index_file_path_v1(
+ _index_path_prefix + "_v1", 4, index_suffix);
+ io::FileWriterOptions opts;
+ Status st = _fs->create_file(index_path, &file_writer, &opts);
+ ASSERT_TRUE(st.ok());
+ MockInvertedIndexFileWriter writer(_fs, _index_path_prefix + "_v1",
"rowset_v1", 4,
+ InvertedIndexStorageFormatPB::V1,
std::move(file_writer));
+ int64_t index_id = 4;
+ auto index_meta = create_mock_tablet_index(index_id, index_suffix);
+ ASSERT_NE(index_meta, nullptr);
+ auto open_result = writer.open(index_meta.get());
+ ASSERT_TRUE(open_result.has_value());
+ auto dir = open_result.value();
+ auto out_file = std::unique_ptr<lucene::store::IndexOutput>(
+ dir->createOutput("add_into_searcher_cache_v1_test"));
+ out_file->writeString("test_v1");
+ out_file->close();
+ dir->close();
+ auto mock_builder = std::make_unique<MockIndexSearcherBuilder>();
+
+ EXPECT_CALL(*mock_builder, get_index_searcher(testing::_))
+ .WillOnce(testing::Invoke(
+ [](lucene::store::Directory* directory) ->
Result<IndexSearcherPtr> {
+ auto close_directory = true;
+ auto bkd_reader =
std::make_shared<lucene::util::bkd::bkd_reader>(
+ directory, close_directory);
+ _CLDECDELETE(directory)
+ return bkd_reader;
+ }));
+ EXPECT_CALL(writer, _construct_index_searcher_builder(testing::_))
+
.WillOnce(testing::Return(testing::ByMove(std::move(mock_builder))));
+ Status close_status = writer.close();
+ ASSERT_TRUE(close_status.ok());
+
+ auto index_file_key = InvertedIndexDescriptor::get_index_file_cache_key(
+ _index_path_prefix + "_v1", index_id, index_suffix);
+ InvertedIndexSearcherCache::CacheKey searcher_cache_key(index_file_key);
+ InvertedIndexCacheHandle cache_handle;
+ bool found = _inverted_index_searcher_cache->lookup(searcher_cache_key,
&cache_handle);
+ ASSERT_TRUE(found);
+ auto* cache_value_use_cache = cache_handle.get_index_cache_value();
+ EXPECT_GE(UnixMillis(), cache_value_use_cache->last_visit_time);
+ auto searcher_variant = cache_value_use_cache->index_searcher;
+ EXPECT_TRUE(std::holds_alternative<BKDIndexSearcherPtr>(searcher_variant));
+ config::enable_write_index_searcher_cache = false;
+}
+
+} // namespace doris::segment_v2
diff --git a/regression-test/pipeline/p0/conf/be.conf
b/regression-test/pipeline/p0/conf/be.conf
index 5627f048905..a122b3750af 100644
--- a/regression-test/pipeline/p0/conf/be.conf
+++ b/regression-test/pipeline/p0/conf/be.conf
@@ -80,3 +80,4 @@ large_cumu_compaction_task_min_thread_num=3
# This feature has bug, so by default is false, only open it in pipeline to
observe
enable_parquet_page_index=true
+enable_write_index_searcher_cache=true
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]