This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit fd62af82d2e3ec32f8ef2cf30f400eecb458cf72 Author: abmdocrt <[email protected]> AuthorDate: Thu Mar 21 12:27:27 2024 +0800 [enhancement](mow) Add bvar for bloom filter and segment (#32355) --- be/src/olap/primary_key_index.cpp | 7 ++++ be/src/olap/primary_key_index.h | 10 ++++++ be/src/olap/rowset/segment_v2/bloom_filter.h | 39 +++++++++++++++++++++- .../segment_v2/bloom_filter_index_writer.cpp | 5 +++ .../rowset/segment_v2/bloom_filter_index_writer.h | 9 ++++- be/src/olap/rowset/segment_v2/segment.cpp | 7 +++- 6 files changed, 74 insertions(+), 3 deletions(-) diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp index b807b249a79..6d1b1772a91 100644 --- a/be/src/olap/primary_key_index.cpp +++ b/be/src/olap/primary_key_index.cpp @@ -109,6 +109,13 @@ Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader, std::unique_ptr<segment_v2::BloomFilterIndexIterator> bf_iter; RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter)); RETURN_IF_ERROR(bf_iter->read_bloom_filter(0, &_bf)); + segment_v2::g_pk_total_bloom_filter_num << 1; + segment_v2::g_pk_total_bloom_filter_total_bytes << _bf->size(); + segment_v2::g_pk_read_bloom_filter_num << 1; + segment_v2::g_pk_read_bloom_filter_total_bytes << _bf->size(); + _bf_num += 1; + _bf_bytes += _bf->size(); + _bf_parsed = true; return Status::OK(); diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h index 3fda3a763ea..618d11b60d9 100644 --- a/be/src/olap/primary_key_index.h +++ b/be/src/olap/primary_key_index.h @@ -39,6 +39,7 @@ namespace io { class FileWriter; } // namespace io namespace segment_v2 { + class PrimaryKeyIndexMetaPB; } // namespace segment_v2 @@ -98,6 +99,13 @@ class PrimaryKeyIndexReader { public: PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {} + ~PrimaryKeyIndexReader() { + segment_v2::g_pk_total_bloom_filter_num << -_bf_num; + segment_v2::g_pk_total_bloom_filter_total_bytes << -_bf_bytes; + segment_v2::g_pk_read_bloom_filter_num << -_bf_num; + segment_v2::g_pk_read_bloom_filter_total_bytes << -_bf_bytes; + } + Status parse_index(io::FileReaderSPtr file_reader, const segment_v2::PrimaryKeyIndexMetaPB& meta); @@ -142,6 +150,8 @@ private: bool _bf_parsed; std::unique_ptr<segment_v2::IndexedColumnReader> _index_reader; std::unique_ptr<segment_v2::BloomFilter> _bf; + size_t _bf_num = 0; + uint64 _bf_bytes = 0; }; } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/bloom_filter.h b/be/src/olap/rowset/segment_v2/bloom_filter.h index cd57181cdb9..13b1558431e 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter.h @@ -17,6 +17,7 @@ #pragma once +#include <bvar/reducer.h> #include <gen_cpp/segment_v2.pb.h> #include <glog/logging.h> #include <string.h> @@ -31,6 +32,22 @@ namespace doris { namespace segment_v2 { +inline bvar::Adder<size_t> g_total_bloom_filter_num("doris_total_bloom_filter_num"); +inline bvar::Adder<size_t> g_read_bloom_filter_num("doris_read_bloom_filter_num"); +inline bvar::Adder<size_t> g_write_bloom_filter_num("doris_write_bloom_filter_num"); + +inline bvar::Adder<size_t> g_total_bloom_filter_total_bytes("doris_total_bloom_filter_bytes"); +inline bvar::Adder<size_t> g_read_bloom_filter_total_bytes("doris_read_bloom_filter_bytes"); +inline bvar::Adder<size_t> g_write_bloom_filter_total_bytes("doris_write_bloom_filter_bytes"); + +inline bvar::Adder<size_t> g_pk_total_bloom_filter_num("doris_pk_total_bloom_filter_num"); +inline bvar::Adder<size_t> g_pk_read_bloom_filter_num("doris_pk_read_bloom_filter_num"); +inline bvar::Adder<size_t> g_pk_write_bloom_filter_num("doris_pk_write_bloom_filter_num"); + +inline bvar::Adder<size_t> g_pk_total_bloom_filter_total_bytes("doris_pk_total_bloom_filter_bytes"); +inline bvar::Adder<size_t> g_pk_read_bloom_filter_total_bytes("doris_pk_read_bloom_filter_bytes"); +inline bvar::Adder<size_t> g_pk_write_bloom_filter_total_bytes("doris_pk_write_bloom_filter_bytes"); + struct BloomFilterOptions { // false positive probability double fpp = 0.05; @@ -55,12 +72,23 @@ public: static Status create(BloomFilterAlgorithmPB algorithm, std::unique_ptr<BloomFilter>* bf, size_t bf_size = 0); - BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), _has_null(nullptr) {} + BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), _has_null(nullptr) { + g_total_bloom_filter_num << 1; + } virtual ~BloomFilter() { if (_data) { + if (_is_write) { + g_write_bloom_filter_total_bytes << -_size; + g_write_bloom_filter_num << -1; + } else { + g_read_bloom_filter_total_bytes << -_size; + g_read_bloom_filter_num << -1; + } + g_total_bloom_filter_total_bytes << -_size; delete[] _data; } + g_total_bloom_filter_num << -1; } virtual bool is_ngram_bf() const { return false; } @@ -86,6 +114,10 @@ public: memset(_data, 0, _size); _has_null = (bool*)(_data + _num_bytes); *_has_null = false; + _is_write = true; + g_write_bloom_filter_num << 1; + g_write_bloom_filter_total_bytes << _size; + g_total_bloom_filter_total_bytes << _size; return Status::OK(); } @@ -107,6 +139,9 @@ public: _num_bytes = _size - 1; DCHECK((_num_bytes & (_num_bytes - 1)) == 0); _has_null = (bool*)(_data + _num_bytes); + g_read_bloom_filter_num << 1; + g_read_bloom_filter_total_bytes << _size; + g_total_bloom_filter_total_bytes << _size; return Status::OK(); } @@ -181,6 +216,8 @@ protected: uint32_t _size; // last byte's pointer in data for null flag bool* _has_null = nullptr; + // is this bf used for write + bool _is_write = false; private: std::function<void(const void*, const int, const uint64_t, void*)> _hash_func; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp index e8bab57003a..27914280784 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp @@ -26,6 +26,7 @@ #include <utility> #include "olap/olap_common.h" +#include "olap/primary_key_index.h" #include "olap/rowset/segment_v2/bloom_filter.h" // for BloomFilterOptions, BloomFilter #include "olap/rowset/segment_v2/indexed_column_writer.h" #include "olap/types.h" @@ -194,6 +195,10 @@ Status PrimaryKeyBloomFilterIndexWriterImpl::flush() { bf->add_bytes(s->data, s->size); } _bf_buffer_size += bf->size(); + g_pk_total_bloom_filter_num << 1; + g_pk_total_bloom_filter_total_bytes << bf->size(); + g_pk_write_bloom_filter_num << 1; + g_pk_write_bloom_filter_total_bytes << bf->size(); _bfs.push_back(std::move(bf)); _values.clear(); _has_null = false; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h index dc4707e6e07..df92f980c58 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h @@ -76,7 +76,14 @@ public: _has_null(false), _bf_buffer_size(0) {} - ~PrimaryKeyBloomFilterIndexWriterImpl() override = default; + ~PrimaryKeyBloomFilterIndexWriterImpl() override { + for (auto& bf : _bfs) { + g_pk_total_bloom_filter_num << -1; + g_pk_total_bloom_filter_total_bytes << -bf->size(); + g_pk_write_bloom_filter_num << -1; + g_pk_write_bloom_filter_total_bytes << -bf->size(); + } + }; void add_values(const void* values, size_t count) override; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index e74c5d2a6b2..17539012a7b 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -74,6 +74,8 @@ namespace doris { namespace segment_v2 { + +bvar::Adder<size_t> g_total_segment_num("doris_total_segment_num"); class InvertedIndexIterator; Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, @@ -94,9 +96,12 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr table _meta_mem_usage(0), _rowset_id(rowset_id), _tablet_schema(tablet_schema), - _segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) {} + _segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) { + g_total_segment_num << 1; +} Segment::~Segment() { + g_total_segment_num << -1; #ifndef BE_TEST _segment_meta_mem_tracker->release(_meta_mem_usage); #endif --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
