This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new cd3e1dce74f [feature](inverted index) Add profile statistics for each 
condition in inverted index filters (#48459)
cd3e1dce74f is described below

commit cd3e1dce74fa3a4f4e271e2af56b1754f1e147ec
Author: zzzxl <[email protected]>
AuthorDate: Sat Mar 1 11:00:19 2025 +0800

    [feature](inverted index) Add profile statistics for each condition in 
inverted index filters (#48459)
    
    https://github.com/apache/doris/pull/47504
---
 be/src/olap/inverted_index_profile.h               | 57 ++++++++++++++++++++++
 be/src/olap/inverted_index_stats.h                 | 34 +++++++++++++
 be/src/olap/olap_common.h                          |  2 +
 .../rowset/segment_v2/inverted_index_reader.cpp    | 20 +++++++-
 be/src/pipeline/exec/olap_scan_operator.cpp        |  4 ++
 be/src/pipeline/exec/olap_scan_operator.h          |  1 +
 be/src/vec/exec/scan/new_olap_scan_node.cpp        |  4 ++
 be/src/vec/exec/scan/new_olap_scan_node.h          |  1 +
 be/src/vec/exec/scan/new_olap_scanner.cpp          |  4 ++
 be/test/olap/inverted_index_profile_test.cpp       | 44 +++++++++++++++++
 10 files changed, 169 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/inverted_index_profile.h 
b/be/src/olap/inverted_index_profile.h
new file mode 100644
index 00000000000..9255e41dc55
--- /dev/null
+++ b/be/src/olap/inverted_index_profile.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "olap/inverted_index_stats.h"
+#include "util/runtime_profile.h"
+
+namespace doris {
+
+class InvertedIndexProfileReporter {
+public:
+    InvertedIndexProfileReporter() = default;
+    ~InvertedIndexProfileReporter() = default;
+
+    void update(RuntimeProfile* profile, const InvertedIndexStatistics* 
statistics) {
+        // Determine the iteration limit: the smaller of 20 or the size of 
statistics->stats
+        size_t iteration_limit = std::min<size_t>(20, 
statistics->stats.size());
+
+        for (size_t i = 0; i < iteration_limit; ++i) {
+            const auto& stats = statistics->stats[i];
+
+            ADD_TIMER_WITH_LEVEL(profile, hit_rows_name, 1);
+            auto* hit_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "fr_" + 
stats.column_name,
+                                                          TUnit::UNIT, 
hit_rows_name, 1);
+            COUNTER_UPDATE(hit_rows, stats.hit_rows);
+
+            ADD_TIMER_WITH_LEVEL(profile, exec_time_name, 1);
+            auto* exec_time = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ft_" + 
stats.column_name,
+                                                           TUnit::TIME_NS, 
exec_time_name, 1);
+            COUNTER_UPDATE(exec_time, stats.exec_time);
+        }
+    }
+
+private:
+    static constexpr const char* hit_rows_name = "HitRows";
+    static constexpr const char* exec_time_name = "ExecTime";
+};
+
+} // namespace doris
diff --git a/be/src/olap/inverted_index_stats.h 
b/be/src/olap/inverted_index_stats.h
new file mode 100644
index 00000000000..b82b230f41d
--- /dev/null
+++ b/be/src/olap/inverted_index_stats.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+namespace doris {
+
+struct InvertedIndexQueryStatistics {
+    std::string column_name;
+    int64_t hit_rows = 0;
+    int64_t exec_time = 0;
+};
+
+struct InvertedIndexStatistics {
+    std::vector<InvertedIndexQueryStatistics> stats;
+};
+
+} // namespace doris
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 77d5228de41..044b7eb45d0 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -36,6 +36,7 @@
 
 #include "common/config.h"
 #include "io/io_common.h"
+#include "olap/inverted_index_stats.h"
 #include "olap/olap_define.h"
 #include "olap/rowset/rowset_fwd.h"
 #include "util/hash_util.hpp"
@@ -377,6 +378,7 @@ struct OlapReaderStatistics {
     int64_t inverted_index_searcher_search_timer = 0;
     int64_t inverted_index_searcher_cache_hit = 0;
     int64_t inverted_index_searcher_cache_miss = 0;
+    InvertedIndexStatistics inverted_index_stats;
 
     int64_t output_index_result_column_timer = 0;
     // number of segment filtered by column stat when creating seg iterator
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 1a20f84a1bd..7281c3a6fe2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -1220,8 +1220,24 @@ Status InvertedIndexIterator::read_from_inverted_index(
         }
     }
 
-    RETURN_IF_ERROR(
-            _reader->query(_stats, _runtime_state, column_name, query_value, 
query_type, bit_map));
+    auto execute_query = [&]() {
+        return _reader->query(_stats, _runtime_state, column_name, 
query_value, query_type,
+                              bit_map);
+    };
+
+    if (_runtime_state->query_options().enable_profile) {
+        InvertedIndexQueryStatistics query_stats;
+        {
+            SCOPED_RAW_TIMER(&query_stats.exec_time);
+            RETURN_IF_ERROR(execute_query());
+        }
+        query_stats.column_name = column_name;
+        query_stats.hit_rows = bit_map->cardinality();
+        _stats->inverted_index_stats.stats.emplace_back(query_stats);
+    } else {
+        RETURN_IF_ERROR(execute_query());
+    }
+
     return Status::OK();
 }
 
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp 
b/be/src/pipeline/exec/olap_scan_operator.cpp
index aa7413b4a16..63735161c56 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -190,6 +190,10 @@ Status OlapScanLocalState::_init_profile() {
     _segment_create_column_readers_timer =
             ADD_TIMER(_scanner_profile, "SegmentCreateColumnReadersTimer");
     _segment_load_index_timer = ADD_TIMER(_scanner_profile, 
"SegmentLoadIndexTimer");
+
+    _index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter");
+    _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr);
+
     return Status::OK();
 }
 
diff --git a/be/src/pipeline/exec/olap_scan_operator.h 
b/be/src/pipeline/exec/olap_scan_operator.h
index de00cd4f37a..37c460a5756 100644
--- a/be/src/pipeline/exec/olap_scan_operator.h
+++ b/be/src/pipeline/exec/olap_scan_operator.h
@@ -97,6 +97,7 @@ private:
     std::set<int32_t> _maybe_read_column_ids;
 
     std::unique_ptr<RuntimeProfile> _segment_profile;
+    std::unique_ptr<RuntimeProfile> _index_filter_profile;
 
     RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr;
 
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp 
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 16236b96e23..4415960ff30 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -195,6 +195,10 @@ Status NewOlapScanNode::_init_profile() {
     _total_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentTotal", 
TUnit::UNIT);
 
     _runtime_filter_info = ADD_LABEL_COUNTER_WITH_LEVEL(_runtime_profile, 
"RuntimeFilterInfo", 1);
+
+    _index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter");
+    _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr);
+
     return Status::OK();
 }
 
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h 
b/be/src/vec/exec/scan/new_olap_scan_node.h
index fd634dbdae4..dfa1842e77f 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.h
+++ b/be/src/vec/exec/scan/new_olap_scan_node.h
@@ -122,6 +122,7 @@ private:
 
 private:
     std::unique_ptr<RuntimeProfile> _segment_profile;
+    std::unique_ptr<RuntimeProfile> _index_filter_profile;
 
     RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr;
 
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp 
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index 6ffb60f425b..237ca738f4d 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -37,6 +37,7 @@
 #include "exprs/function_filter.h"
 #include "io/cache/block/block_file_cache_profile.h"
 #include "io/io_common.h"
+#include "olap/inverted_index_profile.h"
 #include "olap/olap_common.h"
 #include "olap/olap_tuple.h"
 #include "olap/rowset/rowset.h"
@@ -638,6 +639,9 @@ void NewOlapScanner::_collect_profile_before_close() {
                    stats.inverted_index_searcher_cache_hit);                   
                   \
     COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_miss_counter,        
                   \
                    stats.inverted_index_searcher_cache_miss);                  
                   \
+    InvertedIndexProfileReporter inverted_index_profile;                       
                   \
+    inverted_index_profile.update(Parent->_index_filter_profile.get(),         
                   \
+                                  &stats.inverted_index_stats);                
                   \
     if (config::enable_file_cache) {                                           
                   \
         io::FileCacheProfileReporter 
cache_profile(Parent->_segment_profile.get());               \
         cache_profile.update(&stats.file_cache_stats);                         
                   \
diff --git a/be/test/olap/inverted_index_profile_test.cpp 
b/be/test/olap/inverted_index_profile_test.cpp
new file mode 100644
index 00000000000..e3aa3555604
--- /dev/null
+++ b/be/test/olap/inverted_index_profile_test.cpp
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/inverted_index_profile.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "olap/inverted_index_stats.h"
+
+namespace doris {
+
+TEST(InvertedIndexProfileReporterTest, UpdateTest) {
+    auto runtime_profile = std::make_unique<RuntimeProfile>("test_profile");
+
+    InvertedIndexStatistics statistics;
+    statistics.stats.push_back({"test_column1", 101, 201});
+    statistics.stats.push_back({"test_column2", 102, 202});
+
+    InvertedIndexProfileReporter reporter;
+    reporter.update(runtime_profile.get(), &statistics);
+
+    ASSERT_EQ(runtime_profile->get_counter("fr_test_column1")->value(), 101);
+    ASSERT_EQ(runtime_profile->get_counter("ft_test_column1")->value(), 201);
+    ASSERT_EQ(runtime_profile->get_counter("fr_test_column2")->value(), 102);
+    ASSERT_EQ(runtime_profile->get_counter("ft_test_column2")->value(), 202);
+}
+
+} // namespace doris
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to