This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3acf026172 [Enhancement](inverted index) add bkd index query cache to
improve perf (#23952)
3acf026172 is described below
commit 3acf0261724af7add13ac52269d19eac2e192f18
Author: airborne12 <[email protected]>
AuthorDate: Thu Sep 7 10:24:27 2023 +0800
[Enhancement](inverted index) add bkd index query cache to improve perf
(#23952)
use query cache to accelerate bkd query performance, especially for high
concurrency.
---
.../olap/rowset/segment_v2/inverted_index_cache.h | 5 +-
.../rowset/segment_v2/inverted_index_reader.cpp | 105 +++++++++++++--------
.../olap/rowset/segment_v2/inverted_index_reader.h | 13 ++-
3 files changed, 77 insertions(+), 46 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_cache.h
b/be/src/olap/rowset/segment_v2/inverted_index_cache.h
index 388ee02ee9..c67e17ddda 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_cache.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_cache.h
@@ -199,7 +199,7 @@ public:
io::Path index_path; // index file path
std::string column_name; // column name
InvertedIndexQueryType query_type; // query type
- std::wstring value; // query value
+ std::string value; // query value
// Encode to a flat binary which can be used as LRUCache's key
std::string encode() const {
@@ -213,8 +213,7 @@ public:
}
key_buf.append(query_type_str);
key_buf.append("/");
- auto str = lucene_wcstoutf8string(value.c_str(), value.length());
- key_buf.append(str);
+ key_buf.append(value);
return key_buf;
}
};
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index c2a622699a..a521da394c 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -53,7 +53,6 @@
#include "io/fs/file_system.h"
#include "olap/key_coder.h"
#include "olap/olap_common.h"
-#include "olap/rowset/segment_v2/inverted_index_cache.h"
#include "olap/rowset/segment_v2/inverted_index_compound_directory.h"
#include "olap/rowset/segment_v2/inverted_index_desc.h"
#include "olap/types.h"
@@ -178,7 +177,7 @@ Status
InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach
_index_meta.index_id());
auto index_file_path = index_dir / index_file_name;
InvertedIndexQueryCache::CacheKey cache_key {
- index_file_path, "", InvertedIndexQueryType::UNKNOWN_QUERY,
L"null_bitmap"};
+ index_file_path, "", InvertedIndexQueryType::UNKNOWN_QUERY,
"null_bitmap"};
auto cache = InvertedIndexQueryCache::instance();
if (cache->lookup(cache_key, cache_handle)) {
return Status::OK();
@@ -331,9 +330,9 @@ Status FullTextIndexReader::query(OlapReaderStatistics*
stats, const std::string
roaring::Roaring query_match_bitmap;
bool null_bitmap_already_read = false;
if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) {
- std::wstring str_tokens;
+ std::wstring wstr_tokens;
for (auto& token : analyse_result) {
- str_tokens += token;
+ wstr_tokens += token;
}
auto cache = InvertedIndexQueryCache::instance();
@@ -341,6 +340,7 @@ Status FullTextIndexReader::query(OlapReaderStatistics*
stats, const std::string
cache_key.index_path = index_file_path;
cache_key.column_name = column_name;
cache_key.query_type = InvertedIndexQueryType::MATCH_PHRASE_QUERY;
+ auto str_tokens = lucene_wcstoutf8string(wstr_tokens.c_str(),
wstr_tokens.length());
cache_key.value.swap(str_tokens);
InvertedIndexQueryCacheHandle cache_handle;
std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr;
@@ -375,9 +375,10 @@ Status FullTextIndexReader::query(OlapReaderStatistics*
stats, const std::string
// try to get term bitmap match result from cache to avoid
query index on cache hit
auto cache = InvertedIndexQueryCache::instance();
// use EQUAL_QUERY type here since cache is for each term/token
- InvertedIndexQueryCache::CacheKey cache_key {index_file_path,
column_name,
-
InvertedIndexQueryType::EQUAL_QUERY,
- token_ws};
+ auto token = lucene_wcstoutf8string(token_ws.c_str(),
token_ws.length());
+
+ InvertedIndexQueryCache::CacheKey cache_key {
+ index_file_path, column_name,
InvertedIndexQueryType::EQUAL_QUERY, token};
VLOG_DEBUG << "cache_key:" << cache_key.encode();
InvertedIndexQueryCacheHandle cache_handle;
if (cache->lookup(cache_key, &cache_handle)) {
@@ -474,7 +475,7 @@ Status
StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats,
// try to get query bitmap result from cache and return immediately on
cache hit
InvertedIndexQueryCache::CacheKey cache_key {index_file_path, column_name,
query_type,
- search_str_ws};
+ search_str};
auto cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handle;
if (cache->lookup(cache_key, &cache_handle)) {
@@ -593,6 +594,7 @@ BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs, const
std::string& path,
LOG(WARNING) << "bkd index: " << index_file.string() << " not exist.";
return;
}
+ _file_full_path = index_file;
_compoundReader = std::make_unique<DorisCompoundReader>(
DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()),
index_file_name.c_str(),
config::inverted_index_read_buffer_size);
@@ -606,9 +608,8 @@ Status BkdIndexReader::new_iterator(OlapReaderStatistics*
stats,
Status BkdIndexReader::bkd_query(OlapReaderStatistics* stats, const
std::string& column_name,
const void* query_value,
InvertedIndexQueryType query_type,
-
std::shared_ptr<lucene::util::bkd::bkd_reader>& r,
+
std::shared_ptr<lucene::util::bkd::bkd_reader> r,
InvertedIndexVisitor* visitor) {
- RETURN_IF_ERROR(get_bkd_reader(r));
char tmp[r->bytes_per_dim_];
switch (query_type) {
case InvertedIndexQueryType::EQUAL_QUERY: {
@@ -643,6 +644,20 @@ Status BkdIndexReader::try_query(OlapReaderStatistics*
stats, const std::string&
uint32_t* count) {
auto visitor = std::make_unique<InvertedIndexVisitor>(nullptr, query_type,
true);
std::shared_ptr<lucene::util::bkd::bkd_reader> r;
+ RETURN_IF_ERROR(get_bkd_reader(&r));
+ std::string query_str;
+ _value_key_coder->full_encode_ascending(query_value, &query_str);
+
+ InvertedIndexQueryCache::CacheKey cache_key {_file_full_path, column_name,
query_type,
+ query_str};
+ auto cache = InvertedIndexQueryCache::instance();
+ InvertedIndexQueryCacheHandle cache_handler;
+ roaring::Roaring bit_map;
+ auto cache_status = handle_cache(cache, cache_key, &cache_handler, stats,
&bit_map);
+ if (cache_status.ok()) {
+ *count = bit_map.cardinality();
+ return Status::OK();
+ }
try {
auto st = bkd_query(stats, column_name, query_value, query_type, r,
visitor.get());
if (!st.ok()) {
@@ -662,34 +677,42 @@ Status BkdIndexReader::try_query(OlapReaderStatistics*
stats, const std::string&
return Status::OK();
}
+Status BkdIndexReader::handle_cache(InvertedIndexQueryCache* cache,
+ const InvertedIndexQueryCache::CacheKey&
cache_key,
+ InvertedIndexQueryCacheHandle*
cache_handler,
+ OlapReaderStatistics* stats,
roaring::Roaring* bit_map) {
+ if (cache->lookup(cache_key, cache_handler)) {
+ stats->inverted_index_query_cache_hit++;
+ SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
+ *bit_map = *cache_handler->get_bitmap();
+ return Status::OK();
+ } else {
+ stats->inverted_index_query_cache_miss++;
+ return Status::Error<ErrorCode::KEY_NOT_FOUND>("cache miss");
+ }
+}
+
Status BkdIndexReader::query(OlapReaderStatistics* stats, const std::string&
column_name,
const void* query_value, InvertedIndexQueryType
query_type,
roaring::Roaring* bit_map) {
SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);
- io::Path path(_path);
- auto index_dir = path.parent_path();
- auto index_file_name =
- InvertedIndexDescriptor::get_index_file_name(path.filename(),
_index_meta.index_id());
- auto index_file_path = index_dir / index_file_name;
- // std::string query_str {(const char *)query_value};
-
- // // try to get query bitmap result from cache and return immediately on
cache hit
- // InvertedIndexQueryCache::CacheKey cache_key
- // {index_file_path, column_name, query_type,
std::wstring(query_str.begin(), query_str.end())};
- // auto cache = InvertedIndexQueryCache::instance();
- // InvertedIndexQueryCacheHandle cache_handle;
- // if (cache->lookup(cache_key, &cache_handle)) {
- // stats->inverted_index_query_cache_hit++;
- // SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
- // *bit_map = *cache_handle.match_bitmap();
- // return Status::OK();
- // } else {
- // stats->inverted_index_query_cache_miss++;
- // }
-
auto visitor = std::make_unique<InvertedIndexVisitor>(bit_map, query_type);
std::shared_ptr<lucene::util::bkd::bkd_reader> r;
+ RETURN_IF_ERROR(get_bkd_reader(&r));
+
+ std::string query_str;
+ _value_key_coder->full_encode_ascending(query_value, &query_str);
+
+ InvertedIndexQueryCache::CacheKey cache_key {_file_full_path, column_name,
query_type,
+ query_str};
+ auto cache = InvertedIndexQueryCache::instance();
+ InvertedIndexQueryCacheHandle cache_handler;
+ auto cache_status = handle_cache(cache, cache_key, &cache_handler, stats,
bit_map);
+ if (cache_status.ok()) {
+ return Status::OK();
+ }
+
try {
auto st = bkd_query(stats, column_name, query_value, query_type, r,
visitor.get());
if (!st.ok()) {
@@ -705,17 +728,17 @@ Status BkdIndexReader::query(OlapReaderStatistics* stats,
const std::string& col
"BKD Query CLuceneError Occurred, error msg: {}", e.what());
}
- // // add to cache
- // roaring::Roaring* term_match_bitmap = new roaring::Roaring(*bit_map);
- // term_match_bitmap->runOptimize();
- // cache->insert(cache_key, term_match_bitmap, &cache_handle);
+ std::shared_ptr<roaring::Roaring> query_bitmap =
std::make_shared<roaring::Roaring>(*bit_map);
+ query_bitmap->runOptimize();
+ cache->insert(cache_key, query_bitmap, &cache_handler);
VLOG_DEBUG << "BKD index search column: " << column_name
<< " result: " << bit_map->cardinality();
+
return Status::OK();
}
-Status
BkdIndexReader::get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_reader>&
bkdReader) {
+Status
BkdIndexReader::get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_reader>*
bkdReader) {
// bkd file reader
if (_compoundReader == nullptr) {
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
@@ -739,18 +762,18 @@ Status
BkdIndexReader::get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_rea
err.what());
}
- bkdReader =
std::make_shared<lucene::util::bkd::bkd_reader>(data_in.release());
- if (0 == bkdReader->read_meta(meta_in.get())) {
+ *bkdReader =
std::make_shared<lucene::util::bkd::bkd_reader>(data_in.release());
+ if (0 == (*bkdReader)->read_meta(meta_in.get())) {
VLOG_NOTICE << "bkd index file is empty:" <<
_compoundReader->toString();
return Status::EndOfFile("bkd index file is empty");
}
- bkdReader->read_index(index_in.get());
+ (*bkdReader)->read_index(index_in.get());
- _type_info = get_scalar_type_info((FieldType)bkdReader->type);
+ _type_info = get_scalar_type_info((FieldType)(*bkdReader)->type);
if (_type_info == nullptr) {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
- "unsupported typeinfo, type={}", bkdReader->type);
+ "unsupported typeinfo, type={}", (*bkdReader)->type);
}
_value_key_coder = get_key_coder(_type_info->type());
return Status::OK();
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index c076756805..5f7b318825 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -29,6 +29,7 @@
#include "io/fs/file_system.h"
#include "io/fs/path.h"
#include "olap/inverted_index_parser.h"
+#include "olap/rowset/segment_v2/inverted_index_cache.h"
#include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
#include "olap/rowset/segment_v2/inverted_index_query_type.h"
#include "olap/tablet_schema.h"
@@ -192,6 +193,9 @@ public:
class BkdIndexReader : public InvertedIndexReader {
ENABLE_FACTORY_CREATOR(BkdIndexReader);
+private:
+ std::string _file_full_path;
+
public:
explicit BkdIndexReader(io::FileSystemSPtr fs, const std::string& path,
const TabletIndex* index_meta);
@@ -221,11 +225,16 @@ public:
uint32_t* count) override;
Status bkd_query(OlapReaderStatistics* stats, const std::string&
column_name,
const void* query_value, InvertedIndexQueryType
query_type,
- std::shared_ptr<lucene::util::bkd::bkd_reader>& r,
+ std::shared_ptr<lucene::util::bkd::bkd_reader> r,
InvertedIndexVisitor* visitor);
+ Status handle_cache(InvertedIndexQueryCache* cache,
+ const InvertedIndexQueryCache::CacheKey& cache_key,
+ InvertedIndexQueryCacheHandle* cache_handler,
OlapReaderStatistics* stats,
+ roaring::Roaring* bit_map);
+
InvertedIndexReaderType type() override;
- Status get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_reader>&
reader);
+ Status get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_reader>*
reader);
private:
const TypeInfo* _type_info {};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]