This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new cd3e1dce74f [feature](inverted index) Add profile statistics for each
condition in inverted index filters (#48459)
cd3e1dce74f is described below
commit cd3e1dce74fa3a4f4e271e2af56b1754f1e147ec
Author: zzzxl <[email protected]>
AuthorDate: Sat Mar 1 11:00:19 2025 +0800
[feature](inverted index) Add profile statistics for each condition in
inverted index filters (#48459)
https://github.com/apache/doris/pull/47504
---
be/src/olap/inverted_index_profile.h | 57 ++++++++++++++++++++++
be/src/olap/inverted_index_stats.h | 34 +++++++++++++
be/src/olap/olap_common.h | 2 +
.../rowset/segment_v2/inverted_index_reader.cpp | 20 +++++++-
be/src/pipeline/exec/olap_scan_operator.cpp | 4 ++
be/src/pipeline/exec/olap_scan_operator.h | 1 +
be/src/vec/exec/scan/new_olap_scan_node.cpp | 4 ++
be/src/vec/exec/scan/new_olap_scan_node.h | 1 +
be/src/vec/exec/scan/new_olap_scanner.cpp | 4 ++
be/test/olap/inverted_index_profile_test.cpp | 44 +++++++++++++++++
10 files changed, 169 insertions(+), 2 deletions(-)
diff --git a/be/src/olap/inverted_index_profile.h
b/be/src/olap/inverted_index_profile.h
new file mode 100644
index 00000000000..9255e41dc55
--- /dev/null
+++ b/be/src/olap/inverted_index_profile.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "olap/inverted_index_stats.h"
+#include "util/runtime_profile.h"
+
+namespace doris {
+
+class InvertedIndexProfileReporter {
+public:
+ InvertedIndexProfileReporter() = default;
+ ~InvertedIndexProfileReporter() = default;
+
+ void update(RuntimeProfile* profile, const InvertedIndexStatistics*
statistics) {
+ // Determine the iteration limit: the smaller of 20 or the size of
statistics->stats
+ size_t iteration_limit = std::min<size_t>(20,
statistics->stats.size());
+
+ for (size_t i = 0; i < iteration_limit; ++i) {
+ const auto& stats = statistics->stats[i];
+
+ ADD_TIMER_WITH_LEVEL(profile, hit_rows_name, 1);
+ auto* hit_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "fr_" +
stats.column_name,
+ TUnit::UNIT,
hit_rows_name, 1);
+ COUNTER_UPDATE(hit_rows, stats.hit_rows);
+
+ ADD_TIMER_WITH_LEVEL(profile, exec_time_name, 1);
+ auto* exec_time = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ft_" +
stats.column_name,
+ TUnit::TIME_NS,
exec_time_name, 1);
+ COUNTER_UPDATE(exec_time, stats.exec_time);
+ }
+ }
+
+private:
+ static constexpr const char* hit_rows_name = "HitRows";
+ static constexpr const char* exec_time_name = "ExecTime";
+};
+
+} // namespace doris
diff --git a/be/src/olap/inverted_index_stats.h
b/be/src/olap/inverted_index_stats.h
new file mode 100644
index 00000000000..b82b230f41d
--- /dev/null
+++ b/be/src/olap/inverted_index_stats.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+namespace doris {
+
+struct InvertedIndexQueryStatistics {
+ std::string column_name;
+ int64_t hit_rows = 0;
+ int64_t exec_time = 0;
+};
+
+struct InvertedIndexStatistics {
+ std::vector<InvertedIndexQueryStatistics> stats;
+};
+
+} // namespace doris
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 77d5228de41..044b7eb45d0 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -36,6 +36,7 @@
#include "common/config.h"
#include "io/io_common.h"
+#include "olap/inverted_index_stats.h"
#include "olap/olap_define.h"
#include "olap/rowset/rowset_fwd.h"
#include "util/hash_util.hpp"
@@ -377,6 +378,7 @@ struct OlapReaderStatistics {
int64_t inverted_index_searcher_search_timer = 0;
int64_t inverted_index_searcher_cache_hit = 0;
int64_t inverted_index_searcher_cache_miss = 0;
+ InvertedIndexStatistics inverted_index_stats;
int64_t output_index_result_column_timer = 0;
// number of segment filtered by column stat when creating seg iterator
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 1a20f84a1bd..7281c3a6fe2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -1220,8 +1220,24 @@ Status InvertedIndexIterator::read_from_inverted_index(
}
}
- RETURN_IF_ERROR(
- _reader->query(_stats, _runtime_state, column_name, query_value,
query_type, bit_map));
+ auto execute_query = [&]() {
+ return _reader->query(_stats, _runtime_state, column_name,
query_value, query_type,
+ bit_map);
+ };
+
+ if (_runtime_state->query_options().enable_profile) {
+ InvertedIndexQueryStatistics query_stats;
+ {
+ SCOPED_RAW_TIMER(&query_stats.exec_time);
+ RETURN_IF_ERROR(execute_query());
+ }
+ query_stats.column_name = column_name;
+ query_stats.hit_rows = bit_map->cardinality();
+ _stats->inverted_index_stats.stats.emplace_back(query_stats);
+ } else {
+ RETURN_IF_ERROR(execute_query());
+ }
+
return Status::OK();
}
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp
b/be/src/pipeline/exec/olap_scan_operator.cpp
index aa7413b4a16..63735161c56 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -190,6 +190,10 @@ Status OlapScanLocalState::_init_profile() {
_segment_create_column_readers_timer =
ADD_TIMER(_scanner_profile, "SegmentCreateColumnReadersTimer");
_segment_load_index_timer = ADD_TIMER(_scanner_profile,
"SegmentLoadIndexTimer");
+
+ _index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter");
+ _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr);
+
return Status::OK();
}
diff --git a/be/src/pipeline/exec/olap_scan_operator.h
b/be/src/pipeline/exec/olap_scan_operator.h
index de00cd4f37a..37c460a5756 100644
--- a/be/src/pipeline/exec/olap_scan_operator.h
+++ b/be/src/pipeline/exec/olap_scan_operator.h
@@ -97,6 +97,7 @@ private:
std::set<int32_t> _maybe_read_column_ids;
std::unique_ptr<RuntimeProfile> _segment_profile;
+ std::unique_ptr<RuntimeProfile> _index_filter_profile;
RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr;
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 16236b96e23..4415960ff30 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -195,6 +195,10 @@ Status NewOlapScanNode::_init_profile() {
_total_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentTotal",
TUnit::UNIT);
_runtime_filter_info = ADD_LABEL_COUNTER_WITH_LEVEL(_runtime_profile,
"RuntimeFilterInfo", 1);
+
+ _index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter");
+ _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr);
+
return Status::OK();
}
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h
b/be/src/vec/exec/scan/new_olap_scan_node.h
index fd634dbdae4..dfa1842e77f 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.h
+++ b/be/src/vec/exec/scan/new_olap_scan_node.h
@@ -122,6 +122,7 @@ private:
private:
std::unique_ptr<RuntimeProfile> _segment_profile;
+ std::unique_ptr<RuntimeProfile> _index_filter_profile;
RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index 6ffb60f425b..237ca738f4d 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -37,6 +37,7 @@
#include "exprs/function_filter.h"
#include "io/cache/block/block_file_cache_profile.h"
#include "io/io_common.h"
+#include "olap/inverted_index_profile.h"
#include "olap/olap_common.h"
#include "olap/olap_tuple.h"
#include "olap/rowset/rowset.h"
@@ -638,6 +639,9 @@ void NewOlapScanner::_collect_profile_before_close() {
stats.inverted_index_searcher_cache_hit);
\
COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_miss_counter,
\
stats.inverted_index_searcher_cache_miss);
\
+ InvertedIndexProfileReporter inverted_index_profile;
\
+ inverted_index_profile.update(Parent->_index_filter_profile.get(),
\
+ &stats.inverted_index_stats);
\
if (config::enable_file_cache) {
\
io::FileCacheProfileReporter
cache_profile(Parent->_segment_profile.get()); \
cache_profile.update(&stats.file_cache_stats);
\
diff --git a/be/test/olap/inverted_index_profile_test.cpp
b/be/test/olap/inverted_index_profile_test.cpp
new file mode 100644
index 00000000000..e3aa3555604
--- /dev/null
+++ b/be/test/olap/inverted_index_profile_test.cpp
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/inverted_index_profile.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "olap/inverted_index_stats.h"
+
+namespace doris {
+
+TEST(InvertedIndexProfileReporterTest, UpdateTest) {
+ auto runtime_profile = std::make_unique<RuntimeProfile>("test_profile");
+
+ InvertedIndexStatistics statistics;
+ statistics.stats.push_back({"test_column1", 101, 201});
+ statistics.stats.push_back({"test_column2", 102, 202});
+
+ InvertedIndexProfileReporter reporter;
+ reporter.update(runtime_profile.get(), &statistics);
+
+ ASSERT_EQ(runtime_profile->get_counter("fr_test_column1")->value(), 101);
+ ASSERT_EQ(runtime_profile->get_counter("ft_test_column1")->value(), 201);
+ ASSERT_EQ(runtime_profile->get_counter("fr_test_column2")->value(), 102);
+ ASSERT_EQ(runtime_profile->get_counter("ft_test_column2")->value(), 202);
+}
+
+} // namespace doris
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]