This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 3c85740a1e5 branch-2.1: [Opt](metric) Add metrics for invalid delete 
bitmap key count (#51654)
3c85740a1e5 is described below

commit 3c85740a1e5735e93292e3f8f6a10246018eea93
Author: bobhan1 <[email protected]>
AuthorDate: Fri Jun 13 11:24:54 2025 +0800

    branch-2.1: [Opt](metric) Add metrics for invalid delete bitmap key count 
(#51654)
    
    ### What problem does this PR solve?
    
    ```
    # TYPE doris_be_invalid_delete_bitmap_key_count gauge
    doris_be_invalid_delete_bitmap_key_count 0
    # TYPE doris_be_valid_delete_bitmap_key_count gauge
    doris_be_valid_delete_bitmap_key_count 409
    ```
    https://github.com/apache/doris/pull/50080
---
 be/src/common/config.cpp       |  3 +++
 be/src/common/config.h         |  3 +++
 be/src/common/daemon.cpp       | 22 ++++++++++++++++++++++
 be/src/common/daemon.h         |  1 +
 be/src/olap/tablet.cpp         | 23 +++++++++++++++++++++++
 be/src/olap/tablet.h           |  3 +++
 be/src/olap/tablet_manager.cpp | 16 ++++++++++++++++
 be/src/olap/tablet_manager.h   |  2 ++
 be/src/olap/tablet_meta.cpp    | 21 +++++++++++++++++++++
 be/src/olap/tablet_meta.h      |  4 ++++
 be/src/util/doris_metrics.cpp  |  4 ++++
 be/src/util/doris_metrics.h    |  2 ++
 12 files changed, 104 insertions(+)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 41fea7aa651..7db8f200600 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1373,6 +1373,9 @@ DEFINE_mInt32(load_trigger_compaction_version_percent, 
"66");
 DEFINE_mInt64(base_compaction_interval_seconds_since_last_operation, "86400");
 DEFINE_mBool(enable_compaction_pause_on_high_memory, "true");
 
+DEFINE_mBool(enable_report_delete_bitmap_metrics, "false");
+DEFINE_mInt32(report_delete_bitmap_metrics_interval_s, "60");
+
 // clang-format off
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 1bb1ea0ccc2..c0a60419cea 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1445,6 +1445,9 @@ DECLARE_mInt32(load_trigger_compaction_version_percent);
 DECLARE_mInt64(base_compaction_interval_seconds_since_last_operation);
 DECLARE_mBool(enable_compaction_pause_on_high_memory);
 
+DECLARE_mBool(enable_report_delete_bitmap_metrics);
+DECLARE_mInt32(report_delete_bitmap_metrics_interval_s);
+
 #ifdef BE_TEST
 // test s3
 DECLARE_String(test_s3_resource);
diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index 11050c233c7..60f875ff1d4 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -365,6 +365,23 @@ void Daemon::report_runtime_query_statistics_thread() {
     }
 }
 
+void Daemon::report_delete_bitmap_metrics_thread() {
+    while (!_stop_background_threads_latch.wait_for(
+            
std::chrono::seconds(config::report_delete_bitmap_metrics_interval_s))) {
+        if (config::enable_report_delete_bitmap_metrics) {
+            auto* metrics = DorisMetrics::instance();
+            metrics->valid_delete_bitmap_key_count->set_value(
+                    StorageEngine::instance()
+                            ->tablet_manager()
+                            ->get_valid_delete_bitmap_key_count());
+            metrics->invalid_delete_bitmap_key_count->set_value(
+                    StorageEngine::instance()
+                            ->tablet_manager()
+                            ->get_invalid_delete_bitmap_key_count());
+        }
+    }
+}
+
 void Daemon::je_purge_dirty_pages_thread() const {
     do {
         std::unique_lock<std::mutex> 
l(doris::MemInfo::je_purge_dirty_pages_lock);
@@ -455,6 +472,11 @@ void Daemon::start() {
             [this]() { this->report_runtime_query_statistics_thread(); }, 
&_threads.emplace_back());
     CHECK(st.ok()) << st;
 
+    st = Thread::create(
+            "Daemon", "delete_bitmap_metrics_thread",
+            [this]() { this->report_delete_bitmap_metrics_thread(); }, 
&_threads.emplace_back());
+    CHECK(st.ok()) << st;
+
     st = Thread::create(
             "Daemon", "wg_weighted_memory_ratio_refresh_thread",
             [this]() { this->wg_weighted_memory_ratio_refresh_thread(); },
diff --git a/be/src/common/daemon.h b/be/src/common/daemon.h
index 9674b139f00..85b41e6f4ca 100644
--- a/be/src/common/daemon.h
+++ b/be/src/common/daemon.h
@@ -45,6 +45,7 @@ private:
     void je_purge_dirty_pages_thread() const;
     void cache_prune_stale_thread();
     void report_runtime_query_statistics_thread();
+    void report_delete_bitmap_metrics_thread();
     void wg_weighted_memory_ratio_refresh_thread();
     void calculate_workload_group_metrics_thread();
 
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index fe229202701..4661f5eae8d 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -4279,4 +4279,27 @@ Status Tablet::show_nested_index_file(std::string* 
json_meta) {
     return Status::OK();
 }
 
+uint64_t Tablet::valid_delete_bitmap_key_count() {
+    uint64_t ret {0};
+    std::shared_lock<std::shared_mutex> rlock {_meta_lock};
+    _tablet_meta->delete_bitmap().traverse_rowset_id_prefix(
+            [&](const DeleteBitmap& self, const RowsetId& rowset_id) {
+                if (_contains_rowset(rowset_id)) {
+                    ret += self.count_key_with_rowset_id_unlocked(rowset_id);
+                }
+            });
+    return ret;
+}
+
+uint64_t Tablet::invalid_delete_bitmap_key_count() {
+    uint64_t ret {0};
+    std::shared_lock<std::shared_mutex> rlock {_meta_lock};
+    _tablet_meta->delete_bitmap().traverse_rowset_id_prefix(
+            [&](const DeleteBitmap& self, const RowsetId& rowset_id) {
+                if (!_contains_rowset(rowset_id)) {
+                    ret += self.count_key_with_rowset_id_unlocked(rowset_id);
+                }
+            });
+    return ret;
+}
 } // namespace doris
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 362ffcd2e06..a3f2c40a612 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -614,6 +614,9 @@ public:
                                int32_t* rowset_count, int64_t* file_count);
     Status show_nested_index_file(std::string* json_meta);
 
+    uint64_t valid_delete_bitmap_key_count();
+    uint64_t invalid_delete_bitmap_key_count();
+
 private:
     Status _init_once_action();
     void _print_missed_versions(const std::vector<Version>& missed_versions) 
const;
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 63c30b1f2ce..911b758bc81 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -685,6 +685,22 @@ uint64_t TabletManager::get_segment_nums() {
     return segment_nums;
 }
 
+uint64_t TabletManager::get_valid_delete_bitmap_key_count() {
+    uint64_t valid_delete_bitmap_key_count = 0;
+    for_each_tablet([&](const TabletSharedPtr& tablet) {
+        valid_delete_bitmap_key_count += 
tablet->valid_delete_bitmap_key_count();
+    });
+    return valid_delete_bitmap_key_count;
+}
+
+uint64_t TabletManager::get_invalid_delete_bitmap_key_count() {
+    uint64_t invalid_delete_bitmap_key_count = 0;
+    for_each_tablet([&](const TabletSharedPtr& tablet) {
+        invalid_delete_bitmap_key_count += 
tablet->invalid_delete_bitmap_key_count();
+    });
+    return invalid_delete_bitmap_key_count;
+}
+
 bool TabletManager::get_tablet_id_and_schema_hash_from_path(const string& path,
                                                             TTabletId* 
tablet_id,
                                                             TSchemaHash* 
schema_hash) {
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index f4f3765df65..bf000fad95b 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -97,6 +97,8 @@ public:
 
     uint64_t get_rowset_nums();
     uint64_t get_segment_nums();
+    uint64_t get_valid_delete_bitmap_key_count();
+    uint64_t get_invalid_delete_bitmap_key_count();
 
     // Extract tablet_id and schema_hash from given path.
     //
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 7db33c66151..c5fdb21479d 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -27,6 +27,7 @@
 #include <time.h>
 
 #include <cstdint>
+#include <limits>
 #include <memory>
 #include <set>
 #include <utility>
@@ -1159,6 +1160,26 @@ bool 
DeleteBitmap::has_calculated_for_multi_segments(const RowsetId& rowset_id)
     return contains({rowset_id, INVALID_SEGMENT_ID, TEMP_VERSION_COMMON}, 
ROWSET_SENTINEL_MARK);
 }
 
+void DeleteBitmap::traverse_rowset_id_prefix(
+        const std::function<void(const DeleteBitmap&, const RowsetId& 
rowsetId)>& func) const {
+    std::shared_lock rlock {lock};
+    auto it = delete_bitmap.cbegin();
+    while (it != delete_bitmap.cend()) {
+        RowsetId rowset_id = std::get<0>(it->first);
+        func(*this, rowset_id);
+        // find next rowset id
+        it = delete_bitmap.upper_bound({rowset_id, 
std::numeric_limits<SegmentId>::max(),
+                                        std::numeric_limits<Version>::max()});
+    }
+}
+
+uint64_t DeleteBitmap::count_key_with_rowset_id_unlocked(const RowsetId& 
rowset_id) const {
+    auto lower_bound = delete_bitmap.lower_bound({rowset_id, 0, 0});
+    auto upper_bound = delete_bitmap.upper_bound({rowset_id, 
std::numeric_limits<SegmentId>::max(),
+                                                  
std::numeric_limits<Version>::max()});
+    return std::distance(lower_bound, upper_bound);
+}
+
 // We cannot just copy the underlying memory to construct a string
 // due to equivalent objects may have different padding bytes.
 // Reading padding bytes is undefined behavior, neither copy nor
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 7767e6d2184..ba7dbbaf20d 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -511,6 +511,10 @@ public:
 
     bool has_calculated_for_multi_segments(const RowsetId& rowset_id) const;
 
+    void traverse_rowset_id_prefix(
+            const std::function<void(const DeleteBitmap&, const RowsetId& 
rowsetId)>& func) const;
+    uint64_t count_key_with_rowset_id_unlocked(const RowsetId& rowset_id) 
const;
+
     class AggCachePolicy : public LRUCachePolicyTrackingManual {
     public:
         AggCachePolicy(size_t capacity)
diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp
index d55c09a4b94..76f93fbb969 100644
--- a/be/src/util/doris_metrics.cpp
+++ b/be/src/util/doris_metrics.cpp
@@ -137,6 +137,8 @@ 
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(tablet_base_max_compaction_score, MetricUnit:
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(all_rowsets_num, MetricUnit::NOUNIT);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(all_segments_num, MetricUnit::NOUNIT);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(valid_delete_bitmap_key_count, 
MetricUnit::NOUNIT);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(invalid_delete_bitmap_key_count, 
MetricUnit::NOUNIT);
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_used_permits, 
MetricUnit::NOUNIT);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_waitting_permits, 
MetricUnit::NOUNIT);
@@ -269,6 +271,8 @@ DorisMetrics::DorisMetrics() : 
_metric_registry(_s_registry_name) {
 
     INT_GAUGE_METRIC_REGISTER(_server_metric_entity, all_rowsets_num);
     INT_GAUGE_METRIC_REGISTER(_server_metric_entity, all_segments_num);
+    INT_GAUGE_METRIC_REGISTER(_server_metric_entity, 
valid_delete_bitmap_key_count);
+    INT_GAUGE_METRIC_REGISTER(_server_metric_entity, 
invalid_delete_bitmap_key_count);
 
     INT_GAUGE_METRIC_REGISTER(_server_metric_entity, compaction_used_permits);
     INT_GAUGE_METRIC_REGISTER(_server_metric_entity, 
compaction_waitting_permits);
diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h
index 2f1dc4da557..0b37882f77d 100644
--- a/be/src/util/doris_metrics.h
+++ b/be/src/util/doris_metrics.h
@@ -133,6 +133,8 @@ public:
 
     IntGauge* all_rowsets_num = nullptr;
     IntGauge* all_segments_num = nullptr;
+    IntGauge* valid_delete_bitmap_key_count {nullptr};
+    IntGauge* invalid_delete_bitmap_key_count {nullptr};
 
     // permits have been used for all compaction tasks
     IntGauge* compaction_used_permits = nullptr;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to