This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fd62af82d2e3ec32f8ef2cf30f400eecb458cf72
Author: abmdocrt <[email protected]>
AuthorDate: Thu Mar 21 12:27:27 2024 +0800

    [enhancement](mow) Add bvar for bloom filter and segment (#32355)
---
 be/src/olap/primary_key_index.cpp                  |  7 ++++
 be/src/olap/primary_key_index.h                    | 10 ++++++
 be/src/olap/rowset/segment_v2/bloom_filter.h       | 39 +++++++++++++++++++++-
 .../segment_v2/bloom_filter_index_writer.cpp       |  5 +++
 .../rowset/segment_v2/bloom_filter_index_writer.h  |  9 ++++-
 be/src/olap/rowset/segment_v2/segment.cpp          |  7 +++-
 6 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/be/src/olap/primary_key_index.cpp 
b/be/src/olap/primary_key_index.cpp
index b807b249a79..6d1b1772a91 100644
--- a/be/src/olap/primary_key_index.cpp
+++ b/be/src/olap/primary_key_index.cpp
@@ -109,6 +109,13 @@ Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr 
file_reader,
     std::unique_ptr<segment_v2::BloomFilterIndexIterator> bf_iter;
     RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter));
     RETURN_IF_ERROR(bf_iter->read_bloom_filter(0, &_bf));
+    segment_v2::g_pk_total_bloom_filter_num << 1;
+    segment_v2::g_pk_total_bloom_filter_total_bytes << _bf->size();
+    segment_v2::g_pk_read_bloom_filter_num << 1;
+    segment_v2::g_pk_read_bloom_filter_total_bytes << _bf->size();
+    _bf_num += 1;
+    _bf_bytes += _bf->size();
+
     _bf_parsed = true;
 
     return Status::OK();
diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h
index 3fda3a763ea..618d11b60d9 100644
--- a/be/src/olap/primary_key_index.h
+++ b/be/src/olap/primary_key_index.h
@@ -39,6 +39,7 @@ namespace io {
 class FileWriter;
 } // namespace io
 namespace segment_v2 {
+
 class PrimaryKeyIndexMetaPB;
 } // namespace segment_v2
 
@@ -98,6 +99,13 @@ class PrimaryKeyIndexReader {
 public:
     PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {}
 
+    ~PrimaryKeyIndexReader() {
+        segment_v2::g_pk_total_bloom_filter_num << -_bf_num;
+        segment_v2::g_pk_total_bloom_filter_total_bytes << -_bf_bytes;
+        segment_v2::g_pk_read_bloom_filter_num << -_bf_num;
+        segment_v2::g_pk_read_bloom_filter_total_bytes << -_bf_bytes;
+    }
+
     Status parse_index(io::FileReaderSPtr file_reader,
                        const segment_v2::PrimaryKeyIndexMetaPB& meta);
 
@@ -142,6 +150,8 @@ private:
     bool _bf_parsed;
     std::unique_ptr<segment_v2::IndexedColumnReader> _index_reader;
     std::unique_ptr<segment_v2::BloomFilter> _bf;
+    size_t _bf_num = 0;
+    uint64 _bf_bytes = 0;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter.h 
b/be/src/olap/rowset/segment_v2/bloom_filter.h
index cd57181cdb9..13b1558431e 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter.h
+++ b/be/src/olap/rowset/segment_v2/bloom_filter.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <bvar/reducer.h>
 #include <gen_cpp/segment_v2.pb.h>
 #include <glog/logging.h>
 #include <string.h>
@@ -31,6 +32,22 @@
 namespace doris {
 namespace segment_v2 {
 
+inline bvar::Adder<size_t> 
g_total_bloom_filter_num("doris_total_bloom_filter_num");
+inline bvar::Adder<size_t> 
g_read_bloom_filter_num("doris_read_bloom_filter_num");
+inline bvar::Adder<size_t> 
g_write_bloom_filter_num("doris_write_bloom_filter_num");
+
+inline bvar::Adder<size_t> 
g_total_bloom_filter_total_bytes("doris_total_bloom_filter_bytes");
+inline bvar::Adder<size_t> 
g_read_bloom_filter_total_bytes("doris_read_bloom_filter_bytes");
+inline bvar::Adder<size_t> 
g_write_bloom_filter_total_bytes("doris_write_bloom_filter_bytes");
+
+inline bvar::Adder<size_t> 
g_pk_total_bloom_filter_num("doris_pk_total_bloom_filter_num");
+inline bvar::Adder<size_t> 
g_pk_read_bloom_filter_num("doris_pk_read_bloom_filter_num");
+inline bvar::Adder<size_t> 
g_pk_write_bloom_filter_num("doris_pk_write_bloom_filter_num");
+
+inline bvar::Adder<size_t> 
g_pk_total_bloom_filter_total_bytes("doris_pk_total_bloom_filter_bytes");
+inline bvar::Adder<size_t> 
g_pk_read_bloom_filter_total_bytes("doris_pk_read_bloom_filter_bytes");
+inline bvar::Adder<size_t> 
g_pk_write_bloom_filter_total_bytes("doris_pk_write_bloom_filter_bytes");
+
 struct BloomFilterOptions {
     // false positive probability
     double fpp = 0.05;
@@ -55,12 +72,23 @@ public:
     static Status create(BloomFilterAlgorithmPB algorithm, 
std::unique_ptr<BloomFilter>* bf,
                          size_t bf_size = 0);
 
-    BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), 
_has_null(nullptr) {}
+    BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), 
_has_null(nullptr) {
+        g_total_bloom_filter_num << 1;
+    }
 
     virtual ~BloomFilter() {
         if (_data) {
+            if (_is_write) {
+                g_write_bloom_filter_total_bytes << -_size;
+                g_write_bloom_filter_num << -1;
+            } else {
+                g_read_bloom_filter_total_bytes << -_size;
+                g_read_bloom_filter_num << -1;
+            }
+            g_total_bloom_filter_total_bytes << -_size;
             delete[] _data;
         }
+        g_total_bloom_filter_num << -1;
     }
 
     virtual bool is_ngram_bf() const { return false; }
@@ -86,6 +114,10 @@ public:
         memset(_data, 0, _size);
         _has_null = (bool*)(_data + _num_bytes);
         *_has_null = false;
+        _is_write = true;
+        g_write_bloom_filter_num << 1;
+        g_write_bloom_filter_total_bytes << _size;
+        g_total_bloom_filter_total_bytes << _size;
         return Status::OK();
     }
 
@@ -107,6 +139,9 @@ public:
         _num_bytes = _size - 1;
         DCHECK((_num_bytes & (_num_bytes - 1)) == 0);
         _has_null = (bool*)(_data + _num_bytes);
+        g_read_bloom_filter_num << 1;
+        g_read_bloom_filter_total_bytes << _size;
+        g_total_bloom_filter_total_bytes << _size;
         return Status::OK();
     }
 
@@ -181,6 +216,8 @@ protected:
     uint32_t _size;
     // last byte's pointer in data for null flag
     bool* _has_null = nullptr;
+    // is this bf used for write
+    bool _is_write = false;
 
 private:
     std::function<void(const void*, const int, const uint64_t, void*)> 
_hash_func;
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
index e8bab57003a..27914280784 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
@@ -26,6 +26,7 @@
 #include <utility>
 
 #include "olap/olap_common.h"
+#include "olap/primary_key_index.h"
 #include "olap/rowset/segment_v2/bloom_filter.h" // for BloomFilterOptions, 
BloomFilter
 #include "olap/rowset/segment_v2/indexed_column_writer.h"
 #include "olap/types.h"
@@ -194,6 +195,10 @@ Status PrimaryKeyBloomFilterIndexWriterImpl::flush() {
         bf->add_bytes(s->data, s->size);
     }
     _bf_buffer_size += bf->size();
+    g_pk_total_bloom_filter_num << 1;
+    g_pk_total_bloom_filter_total_bytes << bf->size();
+    g_pk_write_bloom_filter_num << 1;
+    g_pk_write_bloom_filter_total_bytes << bf->size();
     _bfs.push_back(std::move(bf));
     _values.clear();
     _has_null = false;
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h 
b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h
index dc4707e6e07..df92f980c58 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h
@@ -76,7 +76,14 @@ public:
               _has_null(false),
               _bf_buffer_size(0) {}
 
-    ~PrimaryKeyBloomFilterIndexWriterImpl() override = default;
+    ~PrimaryKeyBloomFilterIndexWriterImpl() override {
+        for (auto& bf : _bfs) {
+            g_pk_total_bloom_filter_num << -1;
+            g_pk_total_bloom_filter_total_bytes << -bf->size();
+            g_pk_write_bloom_filter_num << -1;
+            g_pk_write_bloom_filter_total_bytes << -bf->size();
+        }
+    };
 
     void add_values(const void* values, size_t count) override;
 
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index e74c5d2a6b2..17539012a7b 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -74,6 +74,8 @@
 namespace doris {
 
 namespace segment_v2 {
+
+bvar::Adder<size_t> g_total_segment_num("doris_total_segment_num");
 class InvertedIndexIterator;
 
 Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t 
segment_id,
@@ -94,9 +96,12 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, 
TabletSchemaSPtr table
           _meta_mem_usage(0),
           _rowset_id(rowset_id),
           _tablet_schema(tablet_schema),
-          
_segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker())
 {}
+          
_segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker())
 {
+    g_total_segment_num << 1;
+}
 
 Segment::~Segment() {
+    g_total_segment_num << -1;
 #ifndef BE_TEST
     _segment_meta_mem_tracker->release(_meta_mem_usage);
 #endif


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to