This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f44d2cf296 [Feature](inverted index) add inverted index size to tablet 
meta (#20916)
f44d2cf296 is described below

commit f44d2cf296a7289f7f2751256f8184de163e85ec
Author: airborne12 <[email protected]>
AuthorDate: Mon Jun 19 10:26:10 2023 +0800

    [Feature](inverted index) add inverted index size to tablet meta (#20916)
    
    1. get inverted index size before segment writer's column writer clear, 
then add size to total data size and total index size
    2. also do this in vertical compaction
---
 be/src/olap/rowset/beta_rowset_writer.cpp          | 8 ++++----
 be/src/olap/rowset/segment_v2/segment_writer.cpp   | 4 ++--
 be/src/olap/rowset/segment_v2/segment_writer.h     | 4 +++-
 be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 5 +++--
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp 
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 7f330a80b7..680645d751 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -802,8 +802,8 @@ Status 
BetaRowsetWriter::_flush_segment_writer(std::unique_ptr<segment_v2::Segme
 
     Statistics segstat;
     segstat.row_num = row_num;
-    segstat.data_size = segment_size;
-    segstat.index_size = index_size;
+    segstat.data_size = segment_size + 
(*writer)->get_inverted_index_file_size();
+    segstat.index_size = index_size + 
(*writer)->get_inverted_index_file_size();
     segstat.key_bounds = key_bounds;
     {
         std::lock_guard<std::mutex> lock(_segid_statistics_map_mutex);
@@ -844,8 +844,8 @@ Status 
BetaRowsetWriter::flush_segment_writer_for_segcompaction(
 
     Statistics segstat;
     segstat.row_num = row_num;
-    segstat.data_size = segment_size;
-    segstat.index_size = index_size;
+    segstat.data_size = segment_size + 
(*writer)->get_inverted_index_file_size();
+    segstat.index_size = index_size + 
(*writer)->get_inverted_index_file_size();
     segstat.key_bounds = key_bounds;
     {
         std::lock_guard<std::mutex> lock(_segid_statistics_map_mutex);
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index a5c9fa147b..561da9c3c6 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -728,7 +728,7 @@ uint64_t SegmentWriter::estimate_segment_size() {
     return size;
 }
 
-size_t SegmentWriter::get_inverted_index_file_size() {
+size_t SegmentWriter::try_get_inverted_index_file_size() {
     size_t total_size = 0;
     for (auto& column_writer : _column_writers) {
         total_size += column_writer->get_inverted_index_size();
@@ -769,7 +769,7 @@ Status SegmentWriter::finalize_columns_index(uint64_t* 
index_size) {
         }
         *index_size = _file_writer->bytes_appended() - index_start;
     }
-
+    _inverted_index_file_size = try_get_inverted_index_file_size();
     // reset all column writers and data_conveter
     clear();
 
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h 
b/be/src/olap/rowset/segment_v2/segment_writer.h
index 70de2cf765..31f652190c 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -105,8 +105,9 @@ public:
     int64_t max_row_to_add(size_t row_avg_size_in_bytes);
 
     uint64_t estimate_segment_size();
-    size_t get_inverted_index_file_size();
+    size_t try_get_inverted_index_file_size();
 
+    size_t get_inverted_index_file_size() const { return 
_inverted_index_file_size; }
     uint32_t num_rows_written() const { return _num_rows_written; }
     uint32_t row_count() const { return _row_count; }
 
@@ -177,6 +178,7 @@ private:
     SegmentFooterPB _footer;
     size_t _num_key_columns;
     size_t _num_short_key_columns;
+    size_t _inverted_index_file_size;
     std::unique_ptr<ShortKeyIndexBuilder> _short_key_index_builder;
     std::unique_ptr<PrimaryKeyIndexBuilder> _primary_key_index_builder;
     std::vector<std::unique_ptr<ColumnWriter>> _column_writers;
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp 
b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index bf003a713c..6c7cced969 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -137,7 +137,8 @@ Status VerticalBetaRowsetWriter::_flush_columns(
         _segment_num_rows.resize(_cur_writer_idx + 1);
         _segment_num_rows[_cur_writer_idx] = 
_segment_writers[_cur_writer_idx]->row_count();
     }
-    _total_index_size += static_cast<int64_t>(index_size);
+    _total_index_size +=
+            static_cast<int64_t>(index_size) + 
(*segment_writer)->get_inverted_index_file_size();
     return Status::OK();
 }
 
@@ -203,7 +204,7 @@ Status VerticalBetaRowsetWriter::final_flush() {
             LOG(WARNING) << "Fail to finalize segment footer, " << st;
             return st;
         }
-        _total_data_size += segment_size;
+        _total_data_size += segment_size + 
segment_writer->get_inverted_index_file_size();
         segment_writer.reset();
     }
     return Status::OK();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to