This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 3d431452995 [opt](inverted index) enhance inverted index profile
(#50876)
3d431452995 is described below
commit 3d4314529951e3a0850f3e59f3180faa2e6498cc
Author: zzzxl <[email protected]>
AuthorDate: Fri Jun 27 17:26:05 2025 +0800
[opt](inverted index) enhance inverted index profile (#50876)
pick https://github.com/apache/doris/pull/47504
pick https://github.com/apache/doris/pull/48826
pick https://github.com/apache/doris/pull/48950
---
be/src/clucene | 2 +-
be/src/io/cache/block_file_cache_profile.h | 40 +++++++-
be/src/io/cache/cached_remote_file_reader.cpp | 15 ++-
be/src/io/io_common.h | 9 +-
be/src/olap/inverted_index_profile.h | 57 ++++++++++++
.../prefix_query.h => inverted_index_stats.h} | 26 ++----
be/src/olap/olap_common.h | 4 +
.../inverted_index/query/phrase_edge_query.cpp | 3 +-
.../inverted_index/query/phrase_edge_query.h | 1 +
.../inverted_index/query/phrase_prefix_query.cpp | 8 +-
.../inverted_index/query/phrase_prefix_query.h | 3 +
.../inverted_index/query/prefix_query.cpp | 4 +-
.../segment_v2/inverted_index/query/prefix_query.h | 17 ++--
.../inverted_index/query/regexp_query.cpp | 3 +-
.../segment_v2/inverted_index/query/regexp_query.h | 1 +
.../segment_v2/inverted_index_fs_directory.cpp | 57 +++++++-----
.../segment_v2/inverted_index_fs_directory.h | 2 +
.../rowset/segment_v2/inverted_index_reader.cpp | 63 +++++++++----
.../olap/rowset/segment_v2/inverted_index_reader.h | 12 ++-
be/src/pipeline/exec/olap_scan_operator.cpp | 8 ++
be/src/pipeline/exec/olap_scan_operator.h | 3 +
be/src/vec/exec/scan/new_olap_scanner.cpp | 8 ++
be/test/olap/inverted_index_profile_test.cpp | 44 +++++++++
.../compaction/util/index_compaction_utils.cpp | 2 +-
.../inverted_index_fs_directory_test.cpp | 102 +++++++++++++++++++++
25 files changed, 404 insertions(+), 90 deletions(-)
diff --git a/be/src/clucene b/be/src/clucene
index 317e5071488..569398a5c96 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit 317e50714884fe338e0f44e870d841df3a91689f
+Subproject commit 569398a5c96b4c626251ccbe81257945a3d2aef4
diff --git a/be/src/io/cache/block_file_cache_profile.h
b/be/src/io/cache/block_file_cache_profile.h
index f9d9df0939f..583e1287c05 100644
--- a/be/src/io/cache/block_file_cache_profile.h
+++ b/be/src/io/cache/block_file_cache_profile.h
@@ -75,7 +75,6 @@ struct FileCacheProfile {
struct FileCacheProfileReporter {
RuntimeProfile::Counter* num_local_io_total = nullptr;
RuntimeProfile::Counter* num_remote_io_total = nullptr;
- RuntimeProfile::Counter* num_inverted_index_remote_io_total = nullptr;
RuntimeProfile::Counter* local_io_timer = nullptr;
RuntimeProfile::Counter* bytes_scanned_from_cache = nullptr;
RuntimeProfile::Counter* bytes_scanned_from_remote = nullptr;
@@ -89,6 +88,14 @@ struct FileCacheProfileReporter {
RuntimeProfile::Counter* get_timer = nullptr;
RuntimeProfile::Counter* set_timer = nullptr;
+ RuntimeProfile::Counter* inverted_index_num_local_io_total = nullptr;
+ RuntimeProfile::Counter* inverted_index_num_remote_io_total = nullptr;
+ RuntimeProfile::Counter* inverted_index_bytes_scanned_from_cache = nullptr;
+ RuntimeProfile::Counter* inverted_index_bytes_scanned_from_remote =
nullptr;
+ RuntimeProfile::Counter* inverted_index_local_io_timer = nullptr;
+ RuntimeProfile::Counter* inverted_index_remote_io_timer = nullptr;
+ RuntimeProfile::Counter* inverted_index_io_timer = nullptr;
+
FileCacheProfileReporter(RuntimeProfile* profile) {
static const char* cache_profile = "FileCache";
ADD_TIMER_WITH_LEVEL(profile, cache_profile, 1);
@@ -96,8 +103,6 @@ struct FileCacheProfileReporter {
cache_profile, 1);
num_remote_io_total = ADD_CHILD_COUNTER_WITH_LEVEL(profile,
"NumRemoteIOTotal", TUnit::UNIT,
cache_profile, 1);
- num_inverted_index_remote_io_total = ADD_CHILD_COUNTER_WITH_LEVEL(
- profile, "NumInvertedIndexRemoteIOTotal", TUnit::UNIT,
cache_profile, 1);
local_io_timer = ADD_CHILD_TIMER_WITH_LEVEL(profile,
"LocalIOUseTimer", cache_profile, 1);
remote_io_timer = ADD_CHILD_TIMER_WITH_LEVEL(profile,
"RemoteIOUseTimer", cache_profile, 1);
write_cache_io_timer =
@@ -117,13 +122,26 @@ struct FileCacheProfileReporter {
lock_wait_timer = ADD_CHILD_TIMER_WITH_LEVEL(profile, "LockWaitTimer",
cache_profile, 1);
get_timer = ADD_CHILD_TIMER_WITH_LEVEL(profile, "GetTimer",
cache_profile, 1);
set_timer = ADD_CHILD_TIMER_WITH_LEVEL(profile, "SetTimer",
cache_profile, 1);
+
+ inverted_index_num_local_io_total = ADD_CHILD_COUNTER_WITH_LEVEL(
+ profile, "InvertedIndexNumLocalIOTotal", TUnit::UNIT,
cache_profile, 1);
+ inverted_index_num_remote_io_total = ADD_CHILD_COUNTER_WITH_LEVEL(
+ profile, "InvertedIndexNumRemoteIOTotal", TUnit::UNIT,
cache_profile, 1);
+ inverted_index_bytes_scanned_from_cache = ADD_CHILD_COUNTER_WITH_LEVEL(
+ profile, "InvertedIndexBytesScannedFromCache", TUnit::BYTES,
cache_profile, 1);
+ inverted_index_bytes_scanned_from_remote =
ADD_CHILD_COUNTER_WITH_LEVEL(
+ profile, "InvertedIndexBytesScannedFromRemote", TUnit::BYTES,
cache_profile, 1);
+ inverted_index_local_io_timer = ADD_CHILD_TIMER_WITH_LEVEL(
+ profile, "InvertedIndexLocalIOUseTimer", cache_profile, 1);
+ inverted_index_remote_io_timer = ADD_CHILD_TIMER_WITH_LEVEL(
+ profile, "InvertedIndexRemoteIOUseTimer", cache_profile, 1);
+ inverted_index_io_timer =
+ ADD_CHILD_TIMER_WITH_LEVEL(profile, "InvertedIndexIOTimer",
cache_profile, 1);
}
void update(const FileCacheStatistics* statistics) const {
COUNTER_UPDATE(num_local_io_total, statistics->num_local_io_total);
COUNTER_UPDATE(num_remote_io_total, statistics->num_remote_io_total);
- COUNTER_UPDATE(num_inverted_index_remote_io_total,
- statistics->num_inverted_index_remote_io_total);
COUNTER_UPDATE(local_io_timer, statistics->local_io_timer);
COUNTER_UPDATE(remote_io_timer, statistics->remote_io_timer);
COUNTER_UPDATE(write_cache_io_timer, statistics->write_cache_io_timer);
@@ -136,6 +154,18 @@ struct FileCacheProfileReporter {
COUNTER_UPDATE(lock_wait_timer, statistics->lock_wait_timer);
COUNTER_UPDATE(get_timer, statistics->get_timer);
COUNTER_UPDATE(set_timer, statistics->set_timer);
+
+ COUNTER_UPDATE(inverted_index_num_local_io_total,
+ statistics->inverted_index_num_local_io_total);
+ COUNTER_UPDATE(inverted_index_num_remote_io_total,
+ statistics->inverted_index_num_remote_io_total);
+ COUNTER_UPDATE(inverted_index_bytes_scanned_from_cache,
+ statistics->inverted_index_bytes_read_from_local);
+ COUNTER_UPDATE(inverted_index_bytes_scanned_from_remote,
+ statistics->inverted_index_bytes_read_from_remote);
+ COUNTER_UPDATE(inverted_index_local_io_timer,
statistics->inverted_index_local_io_timer);
+ COUNTER_UPDATE(inverted_index_remote_io_timer,
statistics->inverted_index_remote_io_timer);
+ COUNTER_UPDATE(inverted_index_io_timer,
statistics->inverted_index_io_timer);
}
};
diff --git a/be/src/io/cache/cached_remote_file_reader.cpp
b/be/src/io/cache/cached_remote_file_reader.cpp
index c7476b7ab74..b89bdcf2f6d 100644
--- a/be/src/io/cache/cached_remote_file_reader.cpp
+++ b/be/src/io/cache/cached_remote_file_reader.cpp
@@ -339,9 +339,6 @@ void CachedRemoteFileReader::_update_stats(const
ReadStatistics& read_stats,
statis->num_local_io_total++;
statis->bytes_read_from_local += read_stats.bytes_read;
} else {
- if (is_inverted_index) {
- statis->num_inverted_index_remote_io_total++;
- }
statis->num_remote_io_total++;
statis->bytes_read_from_remote += read_stats.bytes_read;
}
@@ -357,6 +354,18 @@ void CachedRemoteFileReader::_update_stats(const
ReadStatistics& read_stats,
statis->get_timer += read_stats.get_timer;
statis->set_timer += read_stats.set_timer;
+ if (is_inverted_index) {
+ if (read_stats.hit_cache) {
+ statis->inverted_index_num_local_io_total++;
+ statis->inverted_index_bytes_read_from_local +=
read_stats.bytes_read;
+ } else {
+ statis->inverted_index_num_remote_io_total++;
+ statis->inverted_index_bytes_read_from_remote +=
read_stats.bytes_read;
+ }
+ statis->inverted_index_local_io_timer += read_stats.local_read_timer;
+ statis->inverted_index_remote_io_timer += read_stats.remote_read_timer;
+ }
+
g_skip_cache_num << read_stats.skip_cache;
g_skip_cache_sum << read_stats.skip_cache;
}
diff --git a/be/src/io/io_common.h b/be/src/io/io_common.h
index d4a4e26a7c1..909941181d3 100644
--- a/be/src/io/io_common.h
+++ b/be/src/io/io_common.h
@@ -38,7 +38,6 @@ namespace io {
struct FileCacheStatistics {
int64_t num_local_io_total = 0;
int64_t num_remote_io_total = 0;
- int64_t num_inverted_index_remote_io_total = 0;
int64_t local_io_timer = 0;
int64_t bytes_read_from_local = 0;
int64_t bytes_read_from_remote = 0;
@@ -51,6 +50,14 @@ struct FileCacheStatistics {
int64_t lock_wait_timer = 0;
int64_t get_timer = 0;
int64_t set_timer = 0;
+
+ int64_t inverted_index_num_local_io_total = 0;
+ int64_t inverted_index_num_remote_io_total = 0;
+ int64_t inverted_index_bytes_read_from_local = 0;
+ int64_t inverted_index_bytes_read_from_remote = 0;
+ int64_t inverted_index_local_io_timer = 0;
+ int64_t inverted_index_remote_io_timer = 0;
+ int64_t inverted_index_io_timer = 0;
};
struct IOContext {
diff --git a/be/src/olap/inverted_index_profile.h
b/be/src/olap/inverted_index_profile.h
new file mode 100644
index 00000000000..4b3855b5c8a
--- /dev/null
+++ b/be/src/olap/inverted_index_profile.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "olap/inverted_index_stats.h"
+#include "util/runtime_profile.h"
+
+namespace doris {
+
+class InvertedIndexProfileReporter {
+public:
+ InvertedIndexProfileReporter() = default;
+ ~InvertedIndexProfileReporter() = default;
+
+ void update(RuntimeProfile* profile, const InvertedIndexStatistics*
statistics) {
+ // Determine the iteration limit: the smaller of 20 or the size of
statistics->stats
+ size_t iteration_limit = std::min<size_t>(20,
statistics->stats.size());
+
+ for (size_t i = 0; i < iteration_limit; ++i) {
+ const auto& stats = statistics->stats[i];
+
+ ADD_TIMER_WITH_LEVEL(profile, hit_rows_name, 1);
+ auto* hit_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "HitRows_"
+ stats.column_name,
+ TUnit::UNIT,
hit_rows_name, 1);
+ COUNTER_UPDATE(hit_rows, stats.hit_rows);
+
+ ADD_TIMER_WITH_LEVEL(profile, exec_time_name, 1);
+ auto* exec_time = ADD_CHILD_COUNTER_WITH_LEVEL(profile,
"ExecTime_" + stats.column_name,
+ TUnit::TIME_NS,
exec_time_name, 1);
+ COUNTER_UPDATE(exec_time, stats.exec_time);
+ }
+ }
+
+private:
+ static constexpr const char* hit_rows_name = "HitRows";
+ static constexpr const char* exec_time_name = "ExecTime";
+};
+
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
b/be/src/olap/inverted_index_stats.h
similarity index 60%
copy from be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
copy to be/src/olap/inverted_index_stats.h
index 9a33b13dd8d..b82b230f41d 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
+++ b/be/src/olap/inverted_index_stats.h
@@ -17,24 +17,18 @@
#pragma once
-#include <CLucene.h>
-#include <CLucene/index/IndexReader.h>
+#include <vector>
-#include <cstdint>
+namespace doris {
-CL_NS_USE(index)
-
-namespace doris::segment_v2 {
-
-class PrefixQuery {
-public:
- PrefixQuery() = default;
- virtual ~PrefixQuery() = default;
+struct InvertedIndexQueryStatistics {
+ std::string column_name;
+ int64_t hit_rows = 0;
+ int64_t exec_time = 0;
+};
- static void get_prefix_terms(IndexReader* reader, const std::wstring&
field_name,
- const std::string& prefix,
- std::vector<CL_NS(index)::Term*>&
prefix_terms,
- int32_t max_expansions = 50);
+struct InvertedIndexStatistics {
+ std::vector<InvertedIndexQueryStatistics> stats;
};
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 9ffac13022d..f792e3ac6f9 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -37,6 +37,7 @@
#include "common/config.h"
#include "io/io_common.h"
+#include "olap/inverted_index_stats.h"
#include "olap/olap_define.h"
#include "olap/rowset/rowset_fwd.h"
#include "util/hash_util.hpp"
@@ -373,9 +374,12 @@ struct OlapReaderStatistics {
int64_t inverted_index_query_bitmap_copy_timer = 0;
int64_t inverted_index_searcher_open_timer = 0;
int64_t inverted_index_searcher_search_timer = 0;
+ int64_t inverted_index_searcher_search_init_timer = 0;
+ int64_t inverted_index_searcher_search_exec_timer = 0;
int64_t inverted_index_searcher_cache_hit = 0;
int64_t inverted_index_searcher_cache_miss = 0;
int64_t inverted_index_downgrade_count = 0;
+ InvertedIndexStatistics inverted_index_stats;
int64_t output_index_result_column_timer = 0;
// number of segment filtered by column stat when creating seg iterator
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.cpp
index f82433826e9..2df4aa5929b 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.cpp
@@ -32,6 +32,7 @@ namespace doris::segment_v2 {
PhraseEdgeQuery::PhraseEdgeQuery(const
std::shared_ptr<lucene::search::IndexSearcher>& searcher,
const TQueryOptions& query_options, const
io::IOContext* io_ctx)
: _searcher(searcher),
+ _io_ctx(io_ctx),
_query(std::make_unique<CL_NS(search)::MultiPhraseQuery>()),
_max_expansions(query_options.inverted_index_max_expansions) {}
@@ -143,7 +144,7 @@ void PhraseEdgeQuery::find_words(const
std::function<void(Term*)>& cb) {
Term* term = nullptr;
TermEnum* enumerator = nullptr;
try {
- enumerator = _searcher->getReader()->terms();
+ enumerator = _searcher->getReader()->terms(nullptr, _io_ctx);
while (enumerator->next()) {
term = enumerator->term();
cb(term);
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.h
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.h
index 9eb3bd57c4a..bdf962c9f48 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.h
@@ -48,6 +48,7 @@ private:
private:
std::shared_ptr<lucene::search::IndexSearcher> _searcher;
+ const io::IOContext* _io_ctx = nullptr;
std::wstring _field_name;
std::vector<std::string> _terms;
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
index 88bb3c1171f..ca45108008a 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
@@ -18,7 +18,6 @@
#include "phrase_prefix_query.h"
#include "CLucene/util/stringUtil.h"
-#include "olap/rowset//segment_v2/inverted_index/query/prefix_query.h"
namespace doris::segment_v2 {
@@ -27,7 +26,8 @@ PhrasePrefixQuery::PhrasePrefixQuery(const
std::shared_ptr<lucene::search::Index
const io::IOContext* io_ctx)
: _searcher(searcher),
_query(std::make_unique<CL_NS(search)::MultiPhraseQuery>()),
- _max_expansions(query_options.inverted_index_max_expansions) {}
+ _max_expansions(query_options.inverted_index_max_expansions),
+ _prefix_query(io_ctx) {}
void PhrasePrefixQuery::add(const std::wstring& field_name, const
std::vector<std::string>& terms) {
if (terms.empty()) {
@@ -42,8 +42,8 @@ void PhrasePrefixQuery::add(const std::wstring& field_name,
const std::vector<st
_CLLDECDELETE(t);
} else {
std::vector<CL_NS(index)::Term*> prefix_terms;
- PrefixQuery::get_prefix_terms(_searcher->getReader(), field_name,
terms[i],
- prefix_terms, _max_expansions);
+ _prefix_query.get_prefix_terms(_searcher->getReader(), field_name,
terms[i],
+ prefix_terms, _max_expansions);
if (prefix_terms.empty()) {
std::wstring ws_term = StringUtil::string_to_wstring(terms[i]);
Term* t = _CLNEW Term(field_name.c_str(), ws_term.c_str());
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
index 5cac597951e..d813faf8340 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
@@ -24,6 +24,8 @@
#include "CLucene/search/MultiPhraseQuery.h"
// clang-format on
+#include "olap/rowset/segment_v2/inverted_index/query/prefix_query.h"
+
CL_NS_USE(search)
namespace doris::segment_v2 {
@@ -42,6 +44,7 @@ private:
std::unique_ptr<CL_NS(search)::MultiPhraseQuery> _query;
int32_t _max_expansions = 50;
+ PrefixQuery _prefix_query;
};
} // namespace doris::segment_v2
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.cpp
index 14006227352..2ba7b94966c 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.cpp
@@ -19,6 +19,8 @@
namespace doris::segment_v2 {
+PrefixQuery::PrefixQuery(const io::IOContext* io_ctx) : _io_ctx(io_ctx) {}
+
void PrefixQuery::get_prefix_terms(IndexReader* reader, const std::wstring&
field_name,
const std::string& prefix,
std::vector<CL_NS(index)::Term*>&
prefix_terms,
@@ -26,7 +28,7 @@ void PrefixQuery::get_prefix_terms(IndexReader* reader, const
std::wstring& fiel
std::wstring ws_prefix = StringUtil::string_to_wstring(prefix);
Term* prefix_term = _CLNEW Term(field_name.c_str(), ws_prefix.c_str());
- TermEnum* enumerator = reader->terms(prefix_term);
+ TermEnum* enumerator = reader->terms(prefix_term, _io_ctx);
int32_t count = 0;
Term* lastTerm = nullptr;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
b/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
index 9a33b13dd8d..bff2719455c 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
@@ -17,10 +17,7 @@
#pragma once
-#include <CLucene.h>
-#include <CLucene/index/IndexReader.h>
-
-#include <cstdint>
+#include "olap/rowset/segment_v2/inverted_index/query/query.h"
CL_NS_USE(index)
@@ -28,13 +25,15 @@ namespace doris::segment_v2 {
class PrefixQuery {
public:
- PrefixQuery() = default;
+ PrefixQuery(const io::IOContext* io_ctx);
virtual ~PrefixQuery() = default;
- static void get_prefix_terms(IndexReader* reader, const std::wstring&
field_name,
- const std::string& prefix,
- std::vector<CL_NS(index)::Term*>&
prefix_terms,
- int32_t max_expansions = 50);
+ void get_prefix_terms(IndexReader* reader, const std::wstring& field_name,
+ const std::string& prefix,
std::vector<CL_NS(index)::Term*>& prefix_terms,
+ int32_t max_expansions = 50);
+
+private:
+ const io::IOContext* _io_ctx = nullptr;
};
} // namespace doris::segment_v2
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
index 69de4b7818b..5838e1c3737 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
@@ -27,6 +27,7 @@ namespace doris::segment_v2 {
RegexpQuery::RegexpQuery(const std::shared_ptr<lucene::search::IndexSearcher>&
searcher,
const TQueryOptions& query_options, const
io::IOContext* io_ctx)
: _searcher(searcher),
+ _io_ctx(io_ctx),
_max_expansions(query_options.inverted_index_max_expansions),
_query(searcher, query_options, io_ctx) {}
@@ -66,7 +67,7 @@ void RegexpQuery::add(const std::wstring& field_name, const
std::vector<std::str
int32_t count = 0;
try {
- enumerator = _searcher->getReader()->terms();
+ enumerator = _searcher->getReader()->terms(nullptr, _io_ctx);
while (enumerator->next()) {
term = enumerator->term();
std::string input = lucene_wcstoutf8string(term->text(),
term->textLength());
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h
b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h
index 650ad2bf10b..782e492c473 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h
@@ -36,6 +36,7 @@ public:
private:
std::shared_ptr<lucene::search::IndexSearcher> _searcher;
+ const io::IOContext* _io_ctx = nullptr;
int32_t _max_expansions = 50;
DisjunctionQuery _query;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
index 759cc0b430b..c633d29a7fc 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
@@ -237,35 +237,46 @@ void DorisFSDirectory::FSIndexInput::seekInternal(const
int64_t position) {
void DorisFSDirectory::FSIndexInput::readInternal(uint8_t* b, const int32_t
len) {
CND_PRECONDITION(_handle != nullptr, "shared file handle has closed");
CND_PRECONDITION(_handle->_reader != nullptr, "file is not open");
- std::lock_guard<std::mutex> wlock(_handle->_shared_lock);
- int64_t position = getFilePointer();
- if (_pos != position) {
- _pos = position;
- }
+ int64_t inverted_index_io_timer = 0;
+ {
+ SCOPED_RAW_TIMER(&inverted_index_io_timer);
+
+ std::lock_guard<std::mutex> wlock(_handle->_shared_lock);
+
+ int64_t position = getFilePointer();
+ if (_pos != position) {
+ _pos = position;
+ }
+
+ if (_handle->_fpos != _pos) {
+ _handle->_fpos = _pos;
+ }
- if (_handle->_fpos != _pos) {
+ Slice result {b, (size_t)len};
+ size_t bytes_read = 0;
+ Status st = _handle->_reader->read_at(_pos, result, &bytes_read,
&_io_ctx);
+
DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexInput::readInternal_reader_read_at_error",
{
+ st = Status::InternalError(
+ "debug point: "
+
"DorisFSDirectory::FSIndexInput::readInternal_reader_read_at_error");
+ })
+ if (!st.ok()) {
+ _CLTHROWA(CL_ERR_IO, "read past EOF");
+ }
+ bufferLength = len;
+
DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexInput::readInternal_bytes_read_error",
+ { bytes_read = len + 10; })
+ if (bytes_read != len) {
+ _CLTHROWA(CL_ERR_IO, "read error");
+ }
+ _pos += bufferLength;
_handle->_fpos = _pos;
}
- Slice result {b, (size_t)len};
- size_t bytes_read = 0;
- Status st = _handle->_reader->read_at(_pos, result, &bytes_read, &_io_ctx);
-
DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexInput::readInternal_reader_read_at_error",
{
- st = Status::InternalError(
- "debug point:
DorisFSDirectory::FSIndexInput::readInternal_reader_read_at_error");
- })
- if (!st.ok()) {
- _CLTHROWA(CL_ERR_IO, "read past EOF");
- }
- bufferLength = len;
-
DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexInput::readInternal_bytes_read_error",
- { bytes_read = len + 10; })
- if (bytes_read != len) {
- _CLTHROWA(CL_ERR_IO, "read error");
+ if (_io_ctx.file_cache_stats != nullptr) {
+ _io_ctx.file_cache_stats->inverted_index_io_timer +=
inverted_index_io_timer;
}
- _pos += bufferLength;
- _handle->_fpos = _pos;
}
void DorisFSDirectory::FSIndexOutput::init(const io::FileSystemSPtr& fs, const
char* path) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
index 991ef52d046..0bba5b49756 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
@@ -209,6 +209,8 @@ protected:
void seekInternal(const int64_t position) override;
// IndexInput methods
void readInternal(uint8_t* b, const int32_t len) override;
+
+ friend class DorisFSDirectoryTest;
};
/**
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 6f53857156a..662c930ead2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -256,8 +256,14 @@ Status InvertedIndexReader::match_index_search(
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
"query type " + query_type_to_string(query_type) + ",
query is nullptr");
}
- query->add(query_info);
- query->search(*term_match_bitmap);
+ {
+
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_init_timer);
+ query->add(query_info);
+ }
+ {
+
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_exec_timer);
+ query->search(*term_match_bitmap);
+ }
} catch (const CLuceneError& e) {
return
Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>("CLuceneError occured:
{}",
e.what());
@@ -542,7 +548,7 @@ Status BkdIndexReader::construct_bkd_query_value(const
void* query_value,
return Status::OK();
}
-Status BkdIndexReader::invoke_bkd_try_query(const void* query_value,
+Status BkdIndexReader::invoke_bkd_try_query(const io::IOContext* io_ctx, const
void* query_value,
InvertedIndexQueryType query_type,
std::shared_ptr<lucene::util::bkd::bkd_reader> r,
uint32_t* count) {
@@ -550,7 +556,7 @@ Status BkdIndexReader::invoke_bkd_try_query(const void*
query_value,
case InvertedIndexQueryType::LESS_THAN_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_THAN_QUERY>>(
- r.get(), nullptr, true);
+ io_ctx, r.get(), nullptr, true);
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
*count = r->estimate_point_count(visitor.get());
break;
@@ -558,7 +564,7 @@ Status BkdIndexReader::invoke_bkd_try_query(const void*
query_value,
case InvertedIndexQueryType::LESS_EQUAL_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_EQUAL_QUERY>>(
- r.get(), nullptr, true);
+ io_ctx, r.get(), nullptr, true);
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
*count = r->estimate_point_count(visitor.get());
break;
@@ -566,7 +572,7 @@ Status BkdIndexReader::invoke_bkd_try_query(const void*
query_value,
case InvertedIndexQueryType::GREATER_THAN_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_THAN_QUERY>>(
- r.get(), nullptr, true);
+ io_ctx, r.get(), nullptr, true);
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
*count = r->estimate_point_count(visitor.get());
break;
@@ -574,14 +580,14 @@ Status BkdIndexReader::invoke_bkd_try_query(const void*
query_value,
case InvertedIndexQueryType::GREATER_EQUAL_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_EQUAL_QUERY>>(
- r.get(), nullptr, true);
+ io_ctx, r.get(), nullptr, true);
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
*count = r->estimate_point_count(visitor.get());
break;
}
case InvertedIndexQueryType::EQUAL_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::EQUAL_QUERY>>(
- r.get(), nullptr, true);
+ io_ctx, r.get(), nullptr, true);
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
*count = r->estimate_point_count(visitor.get());
break;
@@ -592,14 +598,15 @@ Status BkdIndexReader::invoke_bkd_try_query(const void*
query_value,
return Status::OK();
}
-Status BkdIndexReader::invoke_bkd_query(const void* query_value,
InvertedIndexQueryType query_type,
+Status BkdIndexReader::invoke_bkd_query(const io::IOContext* io_ctx, const
void* query_value,
+ InvertedIndexQueryType query_type,
std::shared_ptr<lucene::util::bkd::bkd_reader> r,
std::shared_ptr<roaring::Roaring>&
bit_map) {
switch (query_type) {
case InvertedIndexQueryType::LESS_THAN_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_THAN_QUERY>>(
- r.get(), bit_map.get());
+ io_ctx, r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
r->intersect(visitor.get());
break;
@@ -607,7 +614,7 @@ Status BkdIndexReader::invoke_bkd_query(const void*
query_value, InvertedIndexQu
case InvertedIndexQueryType::LESS_EQUAL_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_EQUAL_QUERY>>(
- r.get(), bit_map.get());
+ io_ctx, r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
r->intersect(visitor.get());
break;
@@ -615,7 +622,7 @@ Status BkdIndexReader::invoke_bkd_query(const void*
query_value, InvertedIndexQu
case InvertedIndexQueryType::GREATER_THAN_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_THAN_QUERY>>(
- r.get(), bit_map.get());
+ io_ctx, r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
r->intersect(visitor.get());
break;
@@ -623,14 +630,14 @@ Status BkdIndexReader::invoke_bkd_query(const void*
query_value, InvertedIndexQu
case InvertedIndexQueryType::GREATER_EQUAL_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_EQUAL_QUERY>>(
- r.get(), bit_map.get());
+ io_ctx, r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
r->intersect(visitor.get());
break;
}
case InvertedIndexQueryType::EQUAL_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::EQUAL_QUERY>>(
- r.get(), bit_map.get());
+ io_ctx, r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r,
visitor.get()));
r->intersect(visitor.get());
break;
@@ -668,7 +675,7 @@ Status BkdIndexReader::try_query(const io::IOContext*
io_ctx, OlapReaderStatisti
return Status::OK();
}
- return invoke_bkd_try_query(query_value, query_type, r, count);
+ return invoke_bkd_try_query(io_ctx, query_value, query_type, r, count);
} catch (const CLuceneError& e) {
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"BKD Query CLuceneError Occurred, error msg: {}", e.what());
@@ -706,7 +713,7 @@ Status BkdIndexReader::query(const io::IOContext* io_ctx,
OlapReaderStatistics*
return Status::OK();
}
- RETURN_IF_ERROR(invoke_bkd_query(query_value, query_type, r, bit_map));
+ RETURN_IF_ERROR(invoke_bkd_query(io_ctx, query_value, query_type, r,
bit_map));
bit_map->runOptimize();
cache->insert(cache_key, bit_map, &cache_handler);
@@ -751,9 +758,9 @@ InvertedIndexReaderType BkdIndexReader::type() {
}
template <InvertedIndexQueryType QT>
-InvertedIndexVisitor<QT>::InvertedIndexVisitor(lucene::util::bkd::bkd_reader*
r,
+InvertedIndexVisitor<QT>::InvertedIndexVisitor(const void* io_ctx,
lucene::util::bkd::bkd_reader* r,
roaring::Roaring* h, bool
only_count)
- : _hits(h), _num_hits(0), _only_count(only_count), _reader(r) {}
+ : _io_ctx(io_ctx), _hits(h), _num_hits(0), _only_count(only_count),
_reader(r) {}
template <InvertedIndexQueryType QT>
int InvertedIndexVisitor<QT>::matches(uint8_t* packed_value) {
@@ -1156,8 +1163,24 @@ Status InvertedIndexIterator::read_from_inverted_index(
}
}
- RETURN_IF_ERROR(_reader->query(&_io_ctx, _stats, _runtime_state,
column_name, query_value,
- query_type, bit_map));
+ auto execute_query = [&]() {
+ return _reader->query(&_io_ctx, _stats, _runtime_state, column_name,
query_value,
+ query_type, bit_map);
+ };
+
+ if (_runtime_state->query_options().enable_profile) {
+ InvertedIndexQueryStatistics query_stats;
+ {
+ SCOPED_RAW_TIMER(&query_stats.exec_time);
+ RETURN_IF_ERROR(execute_query());
+ }
+ query_stats.column_name = column_name;
+ query_stats.hit_rows = bit_map->cardinality();
+ _stats->inverted_index_stats.stats.emplace_back(query_stats);
+ } else {
+ RETURN_IF_ERROR(execute_query());
+ }
+
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index 390928493f6..410e5f32e57 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -299,6 +299,7 @@ public:
template <InvertedIndexQueryType QT>
class InvertedIndexVisitor : public
lucene::util::bkd::bkd_reader::intersect_visitor {
private:
+ const void* _io_ctx = nullptr;
roaring::Roaring* _hits = nullptr;
uint32_t _num_hits;
bool _only_count;
@@ -309,8 +310,8 @@ public:
std::string query_max;
public:
- InvertedIndexVisitor(lucene::util::bkd::bkd_reader* r, roaring::Roaring*
hits,
- bool only_count = false);
+ InvertedIndexVisitor(const void* io_ctx, lucene::util::bkd::bkd_reader* r,
+ roaring::Roaring* hits, bool only_count = false);
~InvertedIndexVisitor() override = default;
void set_reader(lucene::util::bkd::bkd_reader* r) { _reader = r; }
@@ -329,6 +330,7 @@ public:
std::vector<uint8_t>& max_packed)
override;
lucene::util::bkd::relation compare_prefix(std::vector<uint8_t>& prefix)
override;
uint32_t get_num_hits() const { return _num_hits; }
+ const void* get_io_context() override { return _io_ctx; }
};
class BkdIndexReader : public InvertedIndexReader {
@@ -351,9 +353,11 @@ public:
Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, uint32_t* count)
override;
- Status invoke_bkd_try_query(const void* query_value,
InvertedIndexQueryType query_type,
+ Status invoke_bkd_try_query(const io::IOContext* io_ctx, const void*
query_value,
+ InvertedIndexQueryType query_type,
std::shared_ptr<lucene::util::bkd::bkd_reader>
r, uint32_t* count);
- Status invoke_bkd_query(const void* query_value, InvertedIndexQueryType
query_type,
+ Status invoke_bkd_query(const io::IOContext* io_ctx, const void*
query_value,
+ InvertedIndexQueryType query_type,
std::shared_ptr<lucene::util::bkd::bkd_reader> r,
std::shared_ptr<roaring::Roaring>& bit_map);
template <InvertedIndexQueryType QT>
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp
b/be/src/pipeline/exec/olap_scan_operator.cpp
index 1434a16184e..53a7d8cd0f3 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -174,6 +174,10 @@ Status OlapScanLocalState::_init_profile() {
ADD_TIMER(_segment_profile, "InvertedIndexSearcherOpenTime");
_inverted_index_searcher_search_timer =
ADD_TIMER(_segment_profile, "InvertedIndexSearcherSearchTime");
+ _inverted_index_searcher_search_init_timer =
+ ADD_TIMER(_segment_profile, "InvertedIndexSearcherSearchInitTime");
+ _inverted_index_searcher_search_exec_timer =
+ ADD_TIMER(_segment_profile, "InvertedIndexSearcherSearchExecTime");
_inverted_index_searcher_cache_hit_counter =
ADD_COUNTER(_segment_profile, "InvertedIndexSearcherCacheHit",
TUnit::UNIT);
_inverted_index_searcher_cache_miss_counter =
@@ -228,6 +232,10 @@ Status OlapScanLocalState::_init_profile() {
_segment_create_column_readers_timer =
ADD_TIMER(_scanner_profile, "SegmentCreateColumnReadersTimer");
_segment_load_index_timer = ADD_TIMER(_scanner_profile,
"SegmentLoadIndexTimer");
+
+ _index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter");
+ _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr);
+
return Status::OK();
}
diff --git a/be/src/pipeline/exec/olap_scan_operator.h
b/be/src/pipeline/exec/olap_scan_operator.h
index e6bc345ffd6..24a1b1b876a 100644
--- a/be/src/pipeline/exec/olap_scan_operator.h
+++ b/be/src/pipeline/exec/olap_scan_operator.h
@@ -98,6 +98,7 @@ private:
std::set<int32_t> _maybe_read_column_ids;
std::unique_ptr<RuntimeProfile> _segment_profile;
+ std::unique_ptr<RuntimeProfile> _index_filter_profile;
RuntimeProfile::Counter* _tablet_counter = nullptr;
RuntimeProfile::Counter* _key_range_counter = nullptr;
@@ -184,6 +185,8 @@ private:
RuntimeProfile::Counter* _inverted_index_query_bitmap_copy_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_searcher_open_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_searcher_search_timer = nullptr;
+ RuntimeProfile::Counter* _inverted_index_searcher_search_init_timer =
nullptr;
+ RuntimeProfile::Counter* _inverted_index_searcher_search_exec_timer =
nullptr;
RuntimeProfile::Counter* _inverted_index_searcher_cache_hit_counter =
nullptr;
RuntimeProfile::Counter* _inverted_index_searcher_cache_miss_counter =
nullptr;
RuntimeProfile::Counter* _inverted_index_downgrade_count_counter = nullptr;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index 0970ded03e9..e9c199074ec 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -41,6 +41,7 @@
#include "exprs/function_filter.h"
#include "io/cache/block_file_cache_profile.h"
#include "io/io_common.h"
+#include "olap/inverted_index_profile.h"
#include "olap/olap_common.h"
#include "olap/olap_tuple.h"
#include "olap/rowset/rowset.h"
@@ -634,12 +635,19 @@ void NewOlapScanner::_collect_profile_before_close() {
stats.inverted_index_searcher_open_timer);
\
COUNTER_UPDATE(Parent->_inverted_index_searcher_search_timer,
\
stats.inverted_index_searcher_search_timer);
\
+ COUNTER_UPDATE(Parent->_inverted_index_searcher_search_init_timer,
\
+ stats.inverted_index_searcher_search_init_timer);
\
+ COUNTER_UPDATE(Parent->_inverted_index_searcher_search_exec_timer,
\
+ stats.inverted_index_searcher_search_exec_timer);
\
COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_hit_counter,
\
stats.inverted_index_searcher_cache_hit);
\
COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_miss_counter,
\
stats.inverted_index_searcher_cache_miss);
\
COUNTER_UPDATE(Parent->_inverted_index_downgrade_count_counter,
\
stats.inverted_index_downgrade_count);
\
+ InvertedIndexProfileReporter inverted_index_profile;
\
+ inverted_index_profile.update(Parent->_index_filter_profile.get(),
\
+ &stats.inverted_index_stats);
\
if (config::enable_file_cache) {
\
io::FileCacheProfileReporter
cache_profile(Parent->_segment_profile.get()); \
cache_profile.update(&stats.file_cache_stats);
\
diff --git a/be/test/olap/inverted_index_profile_test.cpp
b/be/test/olap/inverted_index_profile_test.cpp
new file mode 100644
index 00000000000..25dc63588c7
--- /dev/null
+++ b/be/test/olap/inverted_index_profile_test.cpp
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/inverted_index_profile.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "olap/inverted_index_stats.h"
+
+namespace doris {
+
+TEST(InvertedIndexProfileReporterTest, UpdateTest) {
+ auto runtime_profile = std::make_unique<RuntimeProfile>("test_profile");
+
+ InvertedIndexStatistics statistics;
+ statistics.stats.push_back({"test_column1", 101, 201});
+ statistics.stats.push_back({"test_column2", 102, 202});
+
+ InvertedIndexProfileReporter reporter;
+ reporter.update(runtime_profile.get(), &statistics);
+
+ ASSERT_EQ(runtime_profile->get_counter("HitRows_test_column1")->value(),
101);
+ ASSERT_EQ(runtime_profile->get_counter("ExecTime_test_column1")->value(),
201);
+ ASSERT_EQ(runtime_profile->get_counter("HitRows_test_column2")->value(),
102);
+ ASSERT_EQ(runtime_profile->get_counter("ExecTime_test_column2")->value(),
202);
+}
+
+} // namespace doris
\ No newline at end of file
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/util/index_compaction_utils.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/util/index_compaction_utils.cpp
index 02353fc5441..ae318571bbf 100644
---
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/util/index_compaction_utils.cpp
+++
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/util/index_compaction_utils.cpp
@@ -169,7 +169,7 @@ class IndexCompactionUtils {
.ok());
auto result = std::make_shared<roaring::Roaring>();
EXPECT_TRUE(idx_reader
- ->invoke_bkd_query(query_param->get_value(),
+ ->invoke_bkd_query(nullptr,
query_param->get_value(),
InvertedIndexQueryType::EQUAL_QUERY,
*bkd_searcher, result)
.ok());
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index_fs_directory_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index_fs_directory_test.cpp
new file mode 100644
index 00000000000..fa2145544af
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/inverted_index_fs_directory_test.cpp
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
+
+#include <gtest/gtest.h>
+
+#include <filesystem>
+#include <fstream>
+#include <memory>
+
+#include "common/config.h"
+#include "io/fs/file_system.h"
+#include "io/fs/local_file_system.h"
+
+namespace doris::segment_v2 {
+
+class DorisFSDirectoryTest : public ::testing::Test {
+protected:
+ void SetUp() override {
+ // Enable debug points for testing
+ _original_enable_debug_points = config::enable_debug_points;
+ config::enable_debug_points = true;
+
+ _tmp_dir = std::filesystem::temp_directory_path() /
"doris_fs_directory_test";
+ std::filesystem::remove_all(_tmp_dir);
+ std::filesystem::create_directories(_tmp_dir);
+ _fs = io::global_local_filesystem();
+ _directory = std::make_unique<DorisFSDirectory>();
+ _directory->init(_fs, _tmp_dir.string().c_str());
+ }
+
+ void TearDown() override {
+ _directory.reset();
+ std::filesystem::remove_all(_tmp_dir);
+ config::enable_debug_points = _original_enable_debug_points;
+ }
+
+ std::filesystem::path _tmp_dir;
+ io::FileSystemSPtr _fs;
+ std::unique_ptr<DorisFSDirectory> _directory;
+ bool _original_enable_debug_points;
+};
+
+TEST_F(DorisFSDirectoryTest, FSIndexInputReadInternalTimer) {
+ std::string file_name = "test_timer_file";
+ std::filesystem::path test_file = _tmp_dir / file_name;
+ std::ofstream ofs(test_file);
+ std::string content = "some test content for timer";
+ ofs << content;
+ ofs.close();
+
+ lucene::store::IndexInput* input1 = nullptr;
+ CLuceneError error;
+ bool result =
+ DorisFSDirectory::FSIndexInput::open(_fs,
test_file.string().c_str(), input1, error);
+ EXPECT_TRUE(result);
+ ASSERT_NE(input1, nullptr);
+
+ auto* fs_input1 = dynamic_cast<DorisFSDirectory::FSIndexInput*>(input1);
+ ASSERT_NE(fs_input1, nullptr);
+
+ io::FileCacheStatistics stats;
+ fs_input1->_io_ctx.file_cache_stats = &stats;
+
+ auto* input2 = fs_input1->clone();
+ auto* fs_input2 = dynamic_cast<DorisFSDirectory::FSIndexInput*>(input2);
+ ASSERT_NE(fs_input2, nullptr);
+
+ fs_input2->_io_ctx.file_cache_stats = &stats;
+
+ uint8_t buffer1[10];
+ input1->readBytes(buffer1, 10);
+ EXPECT_GT(stats.inverted_index_io_timer, 0);
+ int64_t old_time = stats.inverted_index_io_timer;
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+ input2->seek(0);
+ uint8_t buffer2[10];
+ input2->readBytes(buffer2, 10);
+ EXPECT_GT(stats.inverted_index_io_timer, old_time);
+
+ _CLDELETE(input2);
+ _CLDELETE(input1);
+}
+
+} // namespace doris::segment_v2
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]