This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 198f5329f8d [feature](inverted index) Add profile statistics for each
condition in inverted index filters (#47504)
198f5329f8d is described below
commit 198f5329f8d2413c6910df56ffe712c3fe48b3de
Author: zzzxl <[email protected]>
AuthorDate: Fri Feb 21 10:21:22 2025 +0800
[feature](inverted index) Add profile statistics for each condition in
inverted index filters (#47504)
Problem Summary:
select count() from httplogs where clientip match '232.71.0.0' and
request match 'images';
IndexFilter:
- HitRows: 0ns
- fr_clientip: 10.392K (10392)
- fr_request: 28.601172M (28601172)
- ExecTime: 0ns
- ft_clientip: 2.65ms
- ft_request: 201.778ms
FilteredRows: Represents the count of rows that met the filtering
conditions post-index filtering.
FilteredTime: Represents the time taken to complete the filtering
operation.
---
be/src/olap/inverted_index_profile.h | 57 ++++++++++++++++++++++
be/src/olap/inverted_index_stats.h | 34 +++++++++++++
be/src/olap/olap_common.h | 2 +
.../rowset/segment_v2/inverted_index_reader.cpp | 20 +++++++-
be/src/pipeline/exec/olap_scan_operator.cpp | 4 ++
be/src/pipeline/exec/olap_scan_operator.h | 1 +
be/src/vec/exec/scan/new_olap_scanner.cpp | 6 +++
be/test/olap/inverted_index_profile_test.cpp | 44 +++++++++++++++++
8 files changed, 166 insertions(+), 2 deletions(-)
diff --git a/be/src/olap/inverted_index_profile.h
b/be/src/olap/inverted_index_profile.h
new file mode 100644
index 00000000000..9255e41dc55
--- /dev/null
+++ b/be/src/olap/inverted_index_profile.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "olap/inverted_index_stats.h"
+#include "util/runtime_profile.h"
+
+namespace doris {
+
+class InvertedIndexProfileReporter {
+public:
+ InvertedIndexProfileReporter() = default;
+ ~InvertedIndexProfileReporter() = default;
+
+ void update(RuntimeProfile* profile, const InvertedIndexStatistics*
statistics) {
+ // Determine the iteration limit: the smaller of 20 or the size of
statistics->stats
+ size_t iteration_limit = std::min<size_t>(20,
statistics->stats.size());
+
+ for (size_t i = 0; i < iteration_limit; ++i) {
+ const auto& stats = statistics->stats[i];
+
+ ADD_TIMER_WITH_LEVEL(profile, hit_rows_name, 1);
+ auto* hit_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "fr_" +
stats.column_name,
+ TUnit::UNIT,
hit_rows_name, 1);
+ COUNTER_UPDATE(hit_rows, stats.hit_rows);
+
+ ADD_TIMER_WITH_LEVEL(profile, exec_time_name, 1);
+ auto* exec_time = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ft_" +
stats.column_name,
+ TUnit::TIME_NS,
exec_time_name, 1);
+ COUNTER_UPDATE(exec_time, stats.exec_time);
+ }
+ }
+
+private:
+ static constexpr const char* hit_rows_name = "HitRows";
+ static constexpr const char* exec_time_name = "ExecTime";
+};
+
+} // namespace doris
diff --git a/be/src/olap/inverted_index_stats.h
b/be/src/olap/inverted_index_stats.h
new file mode 100644
index 00000000000..b82b230f41d
--- /dev/null
+++ b/be/src/olap/inverted_index_stats.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+namespace doris {
+
+struct InvertedIndexQueryStatistics {
+ std::string column_name;
+ int64_t hit_rows = 0;
+ int64_t exec_time = 0;
+};
+
+struct InvertedIndexStatistics {
+ std::vector<InvertedIndexQueryStatistics> stats;
+};
+
+} // namespace doris
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 623d2c83e49..24477b9b66b 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -38,6 +38,7 @@
#include "common/config.h"
#include "common/exception.h"
#include "io/io_common.h"
+#include "olap/inverted_index_stats.h"
#include "olap/olap_define.h"
#include "olap/rowset/rowset_fwd.h"
#include "util/hash_util.hpp"
@@ -378,6 +379,7 @@ struct OlapReaderStatistics {
int64_t inverted_index_searcher_cache_hit = 0;
int64_t inverted_index_searcher_cache_miss = 0;
int64_t inverted_index_downgrade_count = 0;
+ InvertedIndexStatistics inverted_index_stats;
int64_t output_index_result_column_timer = 0;
// number of segment filtered by column stat when creating seg iterator
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 5da74fd1dcf..c885072ee16 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -1177,8 +1177,24 @@ Status InvertedIndexIterator::read_from_inverted_index(
}
}
- RETURN_IF_ERROR(_reader->query(&_io_ctx, _stats, _runtime_state,
column_name, query_value,
- query_type, bit_map));
+ auto execute_query = [&]() {
+ return _reader->query(&_io_ctx, _stats, _runtime_state, column_name,
query_value,
+ query_type, bit_map);
+ };
+
+ if (_runtime_state->query_options().enable_profile) {
+ InvertedIndexQueryStatistics query_stats;
+ {
+ SCOPED_RAW_TIMER(&query_stats.exec_time);
+ RETURN_IF_ERROR(execute_query());
+ }
+ query_stats.column_name = column_name;
+ query_stats.hit_rows = bit_map->cardinality();
+ _stats->inverted_index_stats.stats.emplace_back(query_stats);
+ } else {
+ RETURN_IF_ERROR(execute_query());
+ }
+
return Status::OK();
}
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp
b/be/src/pipeline/exec/olap_scan_operator.cpp
index 37c7664358d..604bf920045 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -196,6 +196,10 @@ Status OlapScanLocalState::_init_profile() {
_segment_create_column_readers_timer =
ADD_TIMER(_scanner_profile, "SegmentCreateColumnReadersTimer");
_segment_load_index_timer = ADD_TIMER(_scanner_profile,
"SegmentLoadIndexTimer");
+
+ _index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter");
+ _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr);
+
return Status::OK();
}
diff --git a/be/src/pipeline/exec/olap_scan_operator.h
b/be/src/pipeline/exec/olap_scan_operator.h
index 7efe357fe3b..347c29e9d43 100644
--- a/be/src/pipeline/exec/olap_scan_operator.h
+++ b/be/src/pipeline/exec/olap_scan_operator.h
@@ -99,6 +99,7 @@ private:
std::set<int32_t> _maybe_read_column_ids;
std::unique_ptr<RuntimeProfile> _segment_profile;
+ std::unique_ptr<RuntimeProfile> _index_filter_profile;
RuntimeProfile::Counter* _tablet_counter = nullptr;
RuntimeProfile::Counter* _key_range_counter = nullptr;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index 248d391aadd..d877a064e90 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -41,6 +41,7 @@
#include "exprs/function_filter.h"
#include "io/cache/block_file_cache_profile.h"
#include "io/io_common.h"
+#include "olap/inverted_index_profile.h"
#include "olap/olap_common.h"
#include "olap/olap_tuple.h"
#include "olap/rowset/rowset.h"
@@ -628,6 +629,11 @@ void NewOlapScanner::_collect_profile_before_close() {
stats.inverted_index_searcher_cache_miss);
COUNTER_UPDATE(local_state->_inverted_index_downgrade_count_counter,
stats.inverted_index_downgrade_count);
+
+ InvertedIndexProfileReporter inverted_index_profile;
+ inverted_index_profile.update(local_state->_index_filter_profile.get(),
+ &stats.inverted_index_stats);
+
if (config::enable_file_cache) {
io::FileCacheProfileReporter
cache_profile(local_state->_segment_profile.get());
cache_profile.update(&stats.file_cache_stats);
diff --git a/be/test/olap/inverted_index_profile_test.cpp
b/be/test/olap/inverted_index_profile_test.cpp
new file mode 100644
index 00000000000..e3aa3555604
--- /dev/null
+++ b/be/test/olap/inverted_index_profile_test.cpp
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/inverted_index_profile.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "olap/inverted_index_stats.h"
+
+namespace doris {
+
+TEST(InvertedIndexProfileReporterTest, UpdateTest) {
+ auto runtime_profile = std::make_unique<RuntimeProfile>("test_profile");
+
+ InvertedIndexStatistics statistics;
+ statistics.stats.push_back({"test_column1", 101, 201});
+ statistics.stats.push_back({"test_column2", 102, 202});
+
+ InvertedIndexProfileReporter reporter;
+ reporter.update(runtime_profile.get(), &statistics);
+
+ ASSERT_EQ(runtime_profile->get_counter("fr_test_column1")->value(), 101);
+ ASSERT_EQ(runtime_profile->get_counter("ft_test_column1")->value(), 201);
+ ASSERT_EQ(runtime_profile->get_counter("fr_test_column2")->value(), 102);
+ ASSERT_EQ(runtime_profile->get_counter("ft_test_column2")->value(), 202);
+}
+
+} // namespace doris
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]