This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d1b448fd71b [fix](rowset meta) Fix rowset meta size relation (#41022)
d1b448fd71b is described below

commit d1b448fd71b3feaa539dee973f8b230cef2d2b20
Author: abmdocrt <yukang.lian2...@gmail.com>
AuthorDate: Thu Oct 31 19:41:05 2024 +0800

    [fix](rowset meta) Fix rowset meta size relation (#41022)
    
    Issue #40879 step 2
    make rowset total size = rowset data size + rowset index size
---
 be/src/cloud/cloud_base_compaction.cpp             |  23 ++--
 be/src/cloud/cloud_cumulative_compaction.cpp       |  25 ++--
 .../cloud/cloud_cumulative_compaction_policy.cpp   |   2 +-
 be/src/cloud/cloud_full_compaction.cpp             |  26 +++--
 be/src/cloud/cloud_meta_mgr.cpp                    | 123 ++++++++++++++++++++
 be/src/cloud/cloud_meta_mgr.h                      |   3 +
 be/src/cloud/cloud_rowset_builder.cpp              |   2 +-
 be/src/cloud/cloud_schema_change_job.cpp           |   2 +-
 be/src/cloud/cloud_tablet.cpp                      |   2 +-
 be/src/common/config.cpp                           |   2 +
 be/src/common/config.h                             |   2 +
 be/src/olap/base_compaction.cpp                    |   2 +-
 be/src/olap/compaction.cpp                         |  29 +++--
 be/src/olap/compaction.h                           |   5 +-
 be/src/olap/cumulative_compaction.cpp              |   3 +-
 be/src/olap/rowset/beta_rowset.cpp                 |   2 +-
 be/src/olap/rowset/beta_rowset_writer.cpp          |   9 +-
 be/src/olap/rowset/beta_rowset_writer.h            |   1 -
 be/src/olap/rowset/rowset.h                        |   3 +-
 be/src/olap/rowset/rowset_meta.cpp                 |   1 +
 be/src/olap/rowset/segment_creator.cpp             |  32 ++++--
 be/src/olap/rowset/vertical_beta_rowset_writer.cpp |   1 -
 be/src/olap/single_replica_compaction.cpp          |  13 ++-
 be/src/olap/tablet.cpp                             | 126 ++++++++++++++++++++-
 be/src/olap/tablet.h                               |   4 +
 be/src/olap/tablet_meta.h                          |   6 +-
 be/src/olap/task/engine_checksum_task.cpp          |   2 +-
 be/src/olap/task/index_builder.cpp                 |  13 +--
 be/src/runtime/load_stream_writer.cpp              |   2 +-
 cloud/src/common/config.h                          |   1 -
 cloud/src/meta-service/meta_service_txn.cpp        |   2 +-
 cloud/test/meta_service_http_test.cpp              |   6 +-
 cloud/test/meta_service_test.cpp                   |   6 +-
 cloud/test/schema_kv_test.cpp                      |   6 +-
 .../pipeline/cloud_p0/conf/be_custom.conf          |   1 +
 .../pipeline/cloud_p1/conf/be_custom.conf          |   1 +
 regression-test/pipeline/p0/conf/be.conf           |   1 +
 regression-test/pipeline/p1/conf/be.conf           |   1 +
 38 files changed, 398 insertions(+), 93 deletions(-)

diff --git a/be/src/cloud/cloud_base_compaction.cpp 
b/be/src/cloud/cloud_base_compaction.cpp
index f431eaf850b..88d83000e95 100644
--- a/be/src/cloud/cloud_base_compaction.cpp
+++ b/be/src/cloud/cloud_base_compaction.cpp
@@ -124,7 +124,8 @@ Status CloudBaseCompaction::prepare_compact() {
     for (auto& rs : _input_rowsets) {
         _input_row_num += rs->num_rows();
         _input_segments += rs->num_segments();
-        _input_rowsets_size += rs->data_disk_size();
+        _input_rowsets_data_size += rs->data_disk_size();
+        _input_rowsets_total_size += rs->total_disk_size();
     }
     LOG_INFO("start CloudBaseCompaction, tablet_id={}, range=[{}-{}]", 
_tablet->tablet_id(),
              _input_rowsets.front()->start_version(), 
_input_rowsets.back()->end_version())
@@ -132,7 +133,9 @@ Status CloudBaseCompaction::prepare_compact() {
             .tag("input_rowsets", _input_rowsets.size())
             .tag("input_rows", _input_row_num)
             .tag("input_segments", _input_segments)
-            .tag("input_data_size", _input_rowsets_size);
+            .tag("input_rowsets_data_size", _input_rowsets_data_size)
+            .tag("input_rowsets_index_size", _input_rowsets_index_size)
+            .tag("input_rowsets_total_size", _input_rowsets_total_size);
     return st;
 }
 
@@ -270,17 +273,21 @@ Status CloudBaseCompaction::execute_compact() {
             .tag("input_rowsets", _input_rowsets.size())
             .tag("input_rows", _input_row_num)
             .tag("input_segments", _input_segments)
-            .tag("input_data_size", _input_rowsets_size)
+            .tag("input_rowsets_data_size", _input_rowsets_data_size)
+            .tag("input_rowsets_index_size", _input_rowsets_index_size)
+            .tag("input_rowsets_total", _input_rowsets_total_size)
             .tag("output_rows", _output_rowset->num_rows())
             .tag("output_segments", _output_rowset->num_segments())
-            .tag("output_data_size", _output_rowset->data_disk_size());
+            .tag("output_rowset_data_size", _output_rowset->data_disk_size())
+            .tag("output_rowset_index_size", _output_rowset->index_disk_size())
+            .tag("output_rowset_total_size", 
_output_rowset->total_disk_size());
 
     //_compaction_succeed = true;
     _state = CompactionState::SUCCESS;
 
     
DorisMetrics::instance()->base_compaction_deltas_total->increment(_input_rowsets.size());
-    
DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_size);
-    base_output_size << _output_rowset->data_disk_size();
+    
DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_total_size);
+    base_output_size << _output_rowset->total_disk_size();
 
     return Status::OK();
 }
@@ -302,8 +309,8 @@ Status CloudBaseCompaction::modify_rowsets() {
     
compaction_job->set_output_cumulative_point(cloud_tablet()->cumulative_layer_point());
     compaction_job->set_num_input_rows(_input_row_num);
     compaction_job->set_num_output_rows(_output_rowset->num_rows());
-    compaction_job->set_size_input_rowsets(_input_rowsets_size);
-    compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size());
+    compaction_job->set_size_input_rowsets(_input_rowsets_total_size);
+    compaction_job->set_size_output_rowsets(_output_rowset->total_disk_size());
     compaction_job->set_num_input_segments(_input_segments);
     compaction_job->set_num_output_segments(_output_rowset->num_segments());
     compaction_job->set_num_input_rowsets(_input_rowsets.size());
diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp 
b/be/src/cloud/cloud_cumulative_compaction.cpp
index 7910d94534e..8eb92577693 100644
--- a/be/src/cloud/cloud_cumulative_compaction.cpp
+++ b/be/src/cloud/cloud_cumulative_compaction.cpp
@@ -164,7 +164,9 @@ PREPARE_TRY_AGAIN:
     for (auto& rs : _input_rowsets) {
         _input_row_num += rs->num_rows();
         _input_segments += rs->num_segments();
-        _input_rowsets_size += rs->data_disk_size();
+        _input_rowsets_data_size += rs->data_disk_size();
+        _input_rowsets_index_size += rs->index_disk_size();
+        _input_rowsets_total_size += rs->total_disk_size();
     }
     LOG_INFO("start CloudCumulativeCompaction, tablet_id={}, range=[{}-{}]", 
_tablet->tablet_id(),
              _input_rowsets.front()->start_version(), 
_input_rowsets.back()->end_version())
@@ -172,7 +174,9 @@ PREPARE_TRY_AGAIN:
             .tag("input_rowsets", _input_rowsets.size())
             .tag("input_rows", _input_row_num)
             .tag("input_segments", _input_segments)
-            .tag("input_data_size", _input_rowsets_size)
+            .tag("input_rowsets_data_size", _input_rowsets_data_size)
+            .tag("input_rowsets_index_size", _input_rowsets_index_size)
+            .tag("input_rowsets_total_size", _input_rowsets_total_size)
             .tag("tablet_max_version", cloud_tablet()->max_version_unlocked())
             .tag("cumulative_point", cloud_tablet()->cumulative_layer_point())
             .tag("num_rowsets", 
cloud_tablet()->fetch_add_approximate_num_rowsets(0))
@@ -201,10 +205,14 @@ Status CloudCumulativeCompaction::execute_compact() {
             .tag("input_rowsets", _input_rowsets.size())
             .tag("input_rows", _input_row_num)
             .tag("input_segments", _input_segments)
-            .tag("input_data_size", _input_rowsets_size)
+            .tag("input_rowsets_data_size", _input_rowsets_data_size)
+            .tag("input_rowsets_index_size", _input_rowsets_index_size)
+            .tag("input_rowsets_total_size", _input_rowsets_total_size)
             .tag("output_rows", _output_rowset->num_rows())
             .tag("output_segments", _output_rowset->num_segments())
-            .tag("output_data_size", _output_rowset->data_disk_size())
+            .tag("output_rowset_data_size", _output_rowset->data_disk_size())
+            .tag("output_rowset_index_size", _output_rowset->index_disk_size())
+            .tag("output_rowset_total_size", _output_rowset->total_disk_size())
             .tag("tablet_max_version", _tablet->max_version_unlocked())
             .tag("cumulative_point", cloud_tablet()->cumulative_layer_point())
             .tag("num_rowsets", 
cloud_tablet()->fetch_add_approximate_num_rowsets(0))
@@ -213,8 +221,9 @@ Status CloudCumulativeCompaction::execute_compact() {
     _state = CompactionState::SUCCESS;
 
     
DorisMetrics::instance()->cumulative_compaction_deltas_total->increment(_input_rowsets.size());
-    
DorisMetrics::instance()->cumulative_compaction_bytes_total->increment(_input_rowsets_size);
-    cumu_output_size << _output_rowset->data_disk_size();
+    DorisMetrics::instance()->cumulative_compaction_bytes_total->increment(
+            _input_rowsets_total_size);
+    cumu_output_size << _output_rowset->total_disk_size();
 
     return Status::OK();
 }
@@ -243,8 +252,8 @@ Status CloudCumulativeCompaction::modify_rowsets() {
     compaction_job->set_output_cumulative_point(new_cumulative_point);
     compaction_job->set_num_input_rows(_input_row_num);
     compaction_job->set_num_output_rows(_output_rowset->num_rows());
-    compaction_job->set_size_input_rowsets(_input_rowsets_size);
-    compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size());
+    compaction_job->set_size_input_rowsets(_input_rowsets_total_size);
+    compaction_job->set_size_output_rowsets(_output_rowset->total_disk_size());
     compaction_job->set_num_input_segments(_input_segments);
     compaction_job->set_num_output_segments(_output_rowset->num_segments());
     compaction_job->set_num_input_rowsets(_input_rowsets.size());
diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.cpp 
b/be/src/cloud/cloud_cumulative_compaction_policy.cpp
index f9af469e56f..5a9879387b2 100644
--- a/be/src/cloud/cloud_cumulative_compaction_policy.cpp
+++ b/be/src/cloud/cloud_cumulative_compaction_policy.cpp
@@ -209,7 +209,7 @@ int64_t 
CloudSizeBasedCumulativeCompactionPolicy::new_cumulative_point(
     // if rowsets have no delete version, check output_rowset total disk size 
satisfies promotion size.
     return output_rowset->start_version() == last_cumulative_point &&
                            (last_delete_version.first != -1 ||
-                            output_rowset->data_disk_size() >= 
cloud_promotion_size(tablet) ||
+                            output_rowset->total_disk_size() >= 
cloud_promotion_size(tablet) ||
                             satisfy_promotion_version)
                    ? output_rowset->end_version() + 1
                    : last_cumulative_point;
diff --git a/be/src/cloud/cloud_full_compaction.cpp 
b/be/src/cloud/cloud_full_compaction.cpp
index f22c449223c..c27b728c93d 100644
--- a/be/src/cloud/cloud_full_compaction.cpp
+++ b/be/src/cloud/cloud_full_compaction.cpp
@@ -98,7 +98,9 @@ Status CloudFullCompaction::prepare_compact() {
     for (auto& rs : _input_rowsets) {
         _input_row_num += rs->num_rows();
         _input_segments += rs->num_segments();
-        _input_rowsets_size += rs->data_disk_size();
+        _input_rowsets_data_size += rs->data_disk_size();
+        _input_rowsets_index_size += rs->index_disk_size();
+        _input_rowsets_total_size += rs->total_disk_size();
     }
     LOG_INFO("start CloudFullCompaction, tablet_id={}, range=[{}-{}]", 
_tablet->tablet_id(),
              _input_rowsets.front()->start_version(), 
_input_rowsets.back()->end_version())
@@ -106,7 +108,9 @@ Status CloudFullCompaction::prepare_compact() {
             .tag("input_rowsets", _input_rowsets.size())
             .tag("input_rows", _input_row_num)
             .tag("input_segments", _input_segments)
-            .tag("input_data_size", _input_rowsets_size);
+            .tag("input_rowsets_data_size", _input_rowsets_data_size)
+            .tag("input_rowsets_index_size", _input_rowsets_index_size)
+            .tag("input_rowsets_total_size", _input_rowsets_total_size);
     return st;
 }
 
@@ -162,16 +166,20 @@ Status CloudFullCompaction::execute_compact() {
             .tag("input_rowsets", _input_rowsets.size())
             .tag("input_rows", _input_row_num)
             .tag("input_segments", _input_segments)
-            .tag("input_data_size", _input_rowsets_size)
+            .tag("input_rowsets_data_size", _input_rowsets_data_size)
+            .tag("input_rowsets_index_size", _input_rowsets_index_size)
+            .tag("input_rowsets_total_size", _input_rowsets_total_size)
             .tag("output_rows", _output_rowset->num_rows())
             .tag("output_segments", _output_rowset->num_segments())
-            .tag("output_data_size", _output_rowset->data_disk_size());
+            .tag("output_rowset_data_size", _output_rowset->data_disk_size())
+            .tag("output_rowset_index_size", _output_rowset->index_disk_size())
+            .tag("output_rowset_total_size", 
_output_rowset->total_disk_size());
 
     _state = CompactionState::SUCCESS;
 
     
DorisMetrics::instance()->full_compaction_deltas_total->increment(_input_rowsets.size());
-    
DorisMetrics::instance()->full_compaction_bytes_total->increment(_input_rowsets_size);
-    full_output_size << _output_rowset->data_disk_size();
+    
DorisMetrics::instance()->full_compaction_bytes_total->increment(_input_rowsets_total_size);
+    full_output_size << _output_rowset->total_disk_size();
 
     return Status::OK();
 }
@@ -193,8 +201,8 @@ Status CloudFullCompaction::modify_rowsets() {
     compaction_job->set_output_cumulative_point(_output_rowset->end_version() 
+ 1);
     compaction_job->set_num_input_rows(_input_row_num);
     compaction_job->set_num_output_rows(_output_rowset->num_rows());
-    compaction_job->set_size_input_rowsets(_input_rowsets_size);
-    compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size());
+    compaction_job->set_size_input_rowsets(_input_rowsets_total_size);
+    compaction_job->set_size_output_rowsets(_output_rowset->total_disk_size());
     
DBUG_EXECUTE_IF("CloudFullCompaction::modify_rowsets.wrong_compaction_data_size",
 {
         compaction_job->set_size_input_rowsets(1);
         compaction_job->set_size_output_rowsets(10000001);
@@ -345,7 +353,7 @@ Status 
CloudFullCompaction::_cloud_full_compaction_update_delete_bitmap(int64_t
             .tag("input_rowsets", _input_rowsets.size())
             .tag("input_rows", _input_row_num)
             .tag("input_segments", _input_segments)
-            .tag("input_data_size", _input_rowsets_size)
+            .tag("input_rowsets_total_size", _input_rowsets_total_size)
             .tag("update_bitmap_size", delete_bitmap->delete_bitmap.size());
     _tablet->tablet_meta()->delete_bitmap().merge(*delete_bitmap);
     return Status::OK();
diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index 7dc9a4f11a1..57f3c7f8009 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -27,6 +27,7 @@
 #include <algorithm>
 #include <atomic>
 #include <chrono>
+#include <cstdint>
 #include <memory>
 #include <mutex>
 #include <random>
@@ -51,6 +52,7 @@
 #include "olap/olap_common.h"
 #include "olap/rowset/rowset.h"
 #include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/rowset_fwd.h"
 #include "olap/storage_engine.h"
 #include "olap/tablet_meta.h"
 #include "runtime/client_cache.h"
@@ -750,6 +752,7 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta& 
rs_meta,
         Status ret_st;
         TEST_INJECTION_POINT_RETURN_WITH_VALUE("CloudMetaMgr::commit_rowset", 
ret_st);
     }
+    check_table_size_correctness(rs_meta);
     CreateRowsetRequest req;
     CreateRowsetResponse resp;
     req.set_cloud_unique_id(config::cloud_unique_id);
@@ -1125,4 +1128,124 @@ Status CloudMetaMgr::remove_old_version_delete_bitmap(
     return st;
 }
 
+void CloudMetaMgr::check_table_size_correctness(const RowsetMeta& rs_meta) {
+    if (!config::enable_table_size_correctness_check) {
+        return;
+    }
+    int64_t total_segment_size = get_segment_file_size(rs_meta);
+    int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta);
+    if (rs_meta.data_disk_size() != total_segment_size ||
+        rs_meta.index_disk_size() != total_inverted_index_size ||
+        rs_meta.data_disk_size() + rs_meta.index_disk_size() != 
rs_meta.total_disk_size()) {
+        LOG(WARNING) << "[Cloud table table size check failed]:"
+                     << " tablet id: " << rs_meta.tablet_id()
+                     << ", rowset id:" << rs_meta.rowset_id()
+                     << ", rowset data disk size:" << rs_meta.data_disk_size()
+                     << ", rowset real data disk size:" << total_segment_size
+                     << ", rowset index disk size:" << 
rs_meta.index_disk_size()
+                     << ", rowset real index disk size:" << 
total_inverted_index_size
+                     << ", rowset total disk size:" << 
rs_meta.total_disk_size()
+                     << ", rowset segment path:"
+                     << 
StorageResource().remote_segment_path(rs_meta.tablet_id(),
+                                                              
rs_meta.rowset_id().to_string(), 0);
+        DCHECK(false);
+    }
+}
+
+int64_t CloudMetaMgr::get_segment_file_size(const RowsetMeta& rs_meta) {
+    int64_t total_segment_size = 0;
+    const auto fs = const_cast<RowsetMeta&>(rs_meta).fs();
+    if (!fs) {
+        LOG(WARNING) << "get fs failed, resource_id={}" << 
rs_meta.resource_id();
+    }
+    for (int64_t seg_id = 0; seg_id < rs_meta.num_segments(); seg_id++) {
+        std::string segment_path = StorageResource().remote_segment_path(
+                rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id);
+        int64_t segment_file_size = 0;
+        auto st = fs->file_size(segment_path, &segment_file_size);
+        if (!st.ok()) {
+            segment_file_size = 0;
+            if (st.is<FILE_NOT_EXIST>()) {
+                LOG(INFO) << "cloud table size correctness check get segment 
size 0 because "
+                             "file not exist! msg:"
+                          << st.msg() << ", segment path:" << segment_path;
+            } else {
+                LOG(WARNING) << "cloud table size correctness check get 
segment size failed! msg:"
+                             << st.msg() << ", segment path:" << segment_path;
+            }
+        }
+        total_segment_size += segment_file_size;
+    }
+    return total_segment_size;
+}
+
+int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) {
+    int64_t total_inverted_index_size = 0;
+    const auto fs = const_cast<RowsetMeta&>(rs_meta).fs();
+    if (!fs) {
+        LOG(WARNING) << "get fs failed, resource_id={}" << 
rs_meta.resource_id();
+    }
+    if (rs_meta.tablet_schema()->get_inverted_index_storage_format() ==
+        InvertedIndexStorageFormatPB::V1) {
+        auto indices = rs_meta.tablet_schema()->indexes();
+        for (auto& index : indices) {
+            // only get file_size for inverted index
+            if (index.index_type() != IndexType::INVERTED) {
+                continue;
+            }
+            for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) {
+                std::string segment_path = 
StorageResource().remote_segment_path(
+                        rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), 
seg_id);
+                int64_t file_size = 0;
+
+                std::string inverted_index_file_path =
+                        InvertedIndexDescriptor::get_index_file_path_v1(
+                                
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path),
+                                index.index_id(), index.get_index_suffix());
+                auto st = fs->file_size(inverted_index_file_path, &file_size);
+                if (!st.ok()) {
+                    file_size = 0;
+                    if (st.is<FILE_NOT_EXIST>()) {
+                        LOG(INFO) << "cloud table size correctness check get 
inverted index v1 "
+                                     "0 because file not exist! msg:"
+                                  << st.msg()
+                                  << ", inverted index path:" << 
inverted_index_file_path;
+                    } else {
+                        LOG(WARNING)
+                                << "cloud table size correctness check get 
inverted index v1 "
+                                   "size failed! msg:"
+                                << st.msg() << ", inverted index path:" << 
inverted_index_file_path;
+                    }
+                }
+                total_inverted_index_size += file_size;
+            }
+        }
+    } else {
+        for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) {
+            int64_t file_size = 0;
+            std::string segment_path = StorageResource().remote_segment_path(
+                    rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), 
seg_id);
+
+            std::string inverted_index_file_path = 
InvertedIndexDescriptor::get_index_file_path_v2(
+                    
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path));
+            auto st = fs->file_size(inverted_index_file_path, &file_size);
+            if (!st.ok()) {
+                file_size = 0;
+                if (st.is<FILE_NOT_EXIST>()) {
+                    LOG(INFO) << "cloud table size correctness check get 
inverted index v2 "
+                                 "0 because file not exist! msg:"
+                              << st.msg() << ", inverted index path:" << 
inverted_index_file_path;
+                } else {
+                    LOG(WARNING) << "cloud table size correctness check get 
inverted index v2 "
+                                    "size failed! msg:"
+                                 << st.msg()
+                                 << ", inverted index path:" << 
inverted_index_file_path;
+                }
+            }
+            total_inverted_index_size += file_size;
+        }
+    }
+    return total_inverted_index_size;
+}
+
 } // namespace doris::cloud
diff --git a/be/src/cloud/cloud_meta_mgr.h b/be/src/cloud/cloud_meta_mgr.h
index 79cdb3fd3d1..a48381f056e 100644
--- a/be/src/cloud/cloud_meta_mgr.h
+++ b/be/src/cloud/cloud_meta_mgr.h
@@ -113,6 +113,9 @@ private:
     Status sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t 
old_max_version,
                                      std::ranges::range auto&& rs_metas, const 
TabletStatsPB& stats,
                                      const TabletIndexPB& idx, DeleteBitmap* 
delete_bitmap);
+    void check_table_size_correctness(const RowsetMeta& rs_meta);
+    int64_t get_segment_file_size(const RowsetMeta& rs_meta);
+    int64_t get_inverted_index_file_szie(const RowsetMeta& rs_meta);
 };
 
 } // namespace cloud
diff --git a/be/src/cloud/cloud_rowset_builder.cpp 
b/be/src/cloud/cloud_rowset_builder.cpp
index 192da0f17ef..2e6764b33aa 100644
--- a/be/src/cloud/cloud_rowset_builder.cpp
+++ b/be/src/cloud/cloud_rowset_builder.cpp
@@ -106,7 +106,7 @@ void CloudRowsetBuilder::update_tablet_stats() {
     tablet->fetch_add_approximate_num_rowsets(1);
     tablet->fetch_add_approximate_num_segments(_rowset->num_segments());
     tablet->fetch_add_approximate_num_rows(_rowset->num_rows());
-    tablet->fetch_add_approximate_data_size(_rowset->data_disk_size());
+    tablet->fetch_add_approximate_data_size(_rowset->total_disk_size());
     tablet->fetch_add_approximate_cumu_num_rowsets(1);
     tablet->fetch_add_approximate_cumu_num_deltas(_rowset->num_segments());
     tablet->write_count.fetch_add(1, std::memory_order_relaxed);
diff --git a/be/src/cloud/cloud_schema_change_job.cpp 
b/be/src/cloud/cloud_schema_change_job.cpp
index b7e3be93e85..896804578d7 100644
--- a/be/src/cloud/cloud_schema_change_job.cpp
+++ b/be/src/cloud/cloud_schema_change_job.cpp
@@ -344,7 +344,7 @@ Status 
CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam
             sc_job->add_txn_ids(rs->txn_id());
             sc_job->add_output_versions(rs->end_version());
             num_output_rows += rs->num_rows();
-            size_output_rowsets += rs->data_disk_size();
+            size_output_rowsets += rs->total_disk_size();
             num_output_segments += rs->num_segments();
         }
         sc_job->set_num_output_rows(num_output_rows);
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index b944db87030..d3b131d055d 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -412,7 +412,7 @@ int CloudTablet::delete_expired_stale_rowsets() {
 void CloudTablet::update_base_size(const Rowset& rs) {
     // Define base rowset as the rowset of version [2-x]
     if (rs.start_version() == 2) {
-        _base_size = rs.data_disk_size();
+        _base_size = rs.total_disk_size();
     }
 }
 
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 2fdebbd09c2..d8308c7eb97 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1356,6 +1356,8 @@ DEFINE_mInt32(check_score_rounds_num, "1000");
 DEFINE_Int32(query_cache_size, "512");
 
 DEFINE_mBool(enable_delete_bitmap_merge_on_compaction, "false");
+// Enable validation to check the correctness of table size.
+DEFINE_Bool(enable_table_size_correctness_check, "false");
 
 // clang-format off
 #ifdef BE_TEST
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 791ca0b5e1a..f827e0f7dae 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1443,6 +1443,8 @@ DECLARE_mInt32(check_score_rounds_num);
 DECLARE_Int32(query_cache_size);
 
 DECLARE_mBool(enable_delete_bitmap_merge_on_compaction);
+// Enable validation to check the correctness of table size.
+DECLARE_Bool(enable_table_size_correctness_check);
 
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp
index 8be29383c1e..8b9cbd75ed3 100644
--- a/be/src/olap/base_compaction.cpp
+++ b/be/src/olap/base_compaction.cpp
@@ -80,7 +80,7 @@ Status BaseCompaction::execute_compact() {
 
     tablet()->set_last_base_compaction_success_time(UnixMillis());
     
DorisMetrics::instance()->base_compaction_deltas_total->increment(_input_rowsets.size());
-    
DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_size);
+    
DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_total_size);
 
     return Status::OK();
 }
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 14769bc315a..a76a5d8679d 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -257,10 +257,10 @@ int64_t Compaction::get_avg_segment_rows() {
     if (meta->compaction_policy() == CUMULATIVE_TIME_SERIES_POLICY) {
         int64_t compaction_goal_size_mbytes = 
meta->time_series_compaction_goal_size_mbytes();
         return (compaction_goal_size_mbytes * 1024 * 1024 * 2) /
-               (_input_rowsets_size / (_input_row_num + 1) + 1);
+               (_input_rowsets_data_size / (_input_row_num + 1) + 1);
     }
     return config::vertical_compaction_max_segment_size /
-           (_input_rowsets_size / (_input_row_num + 1) + 1);
+           (_input_rowsets_data_size / (_input_row_num + 1) + 1);
 }
 
 CompactionMixin::CompactionMixin(StorageEngine& engine, TabletSharedPtr tablet,
@@ -305,9 +305,9 @@ Status CompactionMixin::do_compact_ordered_rowsets() {
     // build output rowset
     RowsetMetaSharedPtr rowset_meta = std::make_shared<RowsetMeta>();
     rowset_meta->set_num_rows(_input_row_num);
-    rowset_meta->set_total_disk_size(_input_rowsets_size);
-    rowset_meta->set_data_disk_size(_input_rowsets_size);
-    rowset_meta->set_index_disk_size(_input_index_size);
+    rowset_meta->set_total_disk_size(_input_rowsets_data_size + 
_input_rowsets_index_size);
+    rowset_meta->set_data_disk_size(_input_rowsets_data_size);
+    rowset_meta->set_index_disk_size(_input_rowsets_index_size);
     rowset_meta->set_empty(_input_row_num == 0);
     rowset_meta->set_num_segments(_input_num_segments);
     rowset_meta->set_segments_overlap(NONOVERLAPPING);
@@ -320,12 +320,13 @@ Status CompactionMixin::do_compact_ordered_rowsets() {
 
 void CompactionMixin::build_basic_info() {
     for (auto& rowset : _input_rowsets) {
-        _input_rowsets_size += rowset->data_disk_size();
-        _input_index_size += rowset->index_disk_size();
+        _input_rowsets_data_size += rowset->data_disk_size();
+        _input_rowsets_index_size += rowset->index_disk_size();
+        _input_rowsets_total_size += rowset->total_disk_size();
         _input_row_num += rowset->num_rows();
         _input_num_segments += rowset->num_segments();
     }
-    COUNTER_UPDATE(_input_rowsets_data_size_counter, _input_rowsets_size);
+    COUNTER_UPDATE(_input_rowsets_data_size_counter, _input_rowsets_data_size);
     COUNTER_UPDATE(_input_row_num_counter, _input_row_num);
     COUNTER_UPDATE(_input_segments_num_counter, _input_num_segments);
 
@@ -444,8 +445,12 @@ Status CompactionMixin::execute_compact_impl(int64_t 
permits) {
                   << ", disk=" << tablet()->data_dir()->path()
                   << ", segments=" << _input_num_segments << ", 
input_row_num=" << _input_row_num
                   << ", output_row_num=" << _output_rowset->num_rows()
-                  << ", input_rowset_size=" << _input_rowsets_size
-                  << ", output_rowset_size=" << 
_output_rowset->data_disk_size()
+                  << ", input_rowsets_data_size=" << _input_rowsets_data_size
+                  << ", input_rowsets_index_size=" << _input_rowsets_index_size
+                  << ", input_rowsets_total_size=" << _input_rowsets_total_size
+                  << ", output_rowset_data_size=" << 
_output_rowset->data_disk_size()
+                  << ", output_rowset_index_size=" << 
_output_rowset->index_disk_size()
+                  << ", output_rowset_total_size=" << 
_output_rowset->total_disk_size()
                   << ". elapsed time=" << watch.get_elapse_second() << "s.";
         _state = CompactionState::SUCCESS;
         return Status::OK();
@@ -467,8 +472,8 @@ Status CompactionMixin::execute_compact_impl(int64_t 
permits) {
               << ". tablet=" << _tablet->tablet_id() << ", output_version=" << 
_output_version
               << ", current_max_version=" << tablet()->max_version().second
               << ", disk=" << tablet()->data_dir()->path() << ", segments=" << 
_input_num_segments
-              << ", input_rowset_size=" << _input_rowsets_size
-              << ", output_rowset_size=" << _output_rowset->data_disk_size()
+              << ", input_data_size=" << _input_rowsets_data_size
+              << ", output_rowset_size=" << _output_rowset->total_disk_size()
               << ", input_row_num=" << _input_row_num
               << ", output_row_num=" << _output_rowset->num_rows()
               << ", filtered_row_num=" << _stats.filtered_rows
diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h
index 13a37beca19..06ef4268529 100644
--- a/be/src/olap/compaction.h
+++ b/be/src/olap/compaction.h
@@ -90,10 +90,11 @@ protected:
     BaseTabletSPtr _tablet;
 
     std::vector<RowsetSharedPtr> _input_rowsets;
-    int64_t _input_rowsets_size {0};
+    int64_t _input_rowsets_data_size {0};
+    int64_t _input_rowsets_index_size {0};
+    int64_t _input_rowsets_total_size {0};
     int64_t _input_row_num {0};
     int64_t _input_num_segments {0};
-    int64_t _input_index_size {0};
 
     Merger::Statistics _stats;
 
diff --git a/be/src/olap/cumulative_compaction.cpp 
b/be/src/olap/cumulative_compaction.cpp
index b762468b345..b961c694ede 100644
--- a/be/src/olap/cumulative_compaction.cpp
+++ b/be/src/olap/cumulative_compaction.cpp
@@ -125,7 +125,8 @@ Status CumulativeCompaction::execute_compact() {
         tablet()->set_last_cumu_compaction_success_time(UnixMillis());
     }
     
DorisMetrics::instance()->cumulative_compaction_deltas_total->increment(_input_rowsets.size());
-    
DorisMetrics::instance()->cumulative_compaction_bytes_total->increment(_input_rowsets_size);
+    DorisMetrics::instance()->cumulative_compaction_bytes_total->increment(
+            _input_rowsets_total_size);
 
     return Status::OK();
 }
diff --git a/be/src/olap/rowset/beta_rowset.cpp 
b/be/src/olap/rowset/beta_rowset.cpp
index ee1605a3043..4b51dcc3530 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -498,7 +498,7 @@ Status BetaRowset::upload_to(const StorageResource& 
dest_fs, const RowsetId& new
     auto st = dest_fs.fs->batch_upload(local_paths, dest_paths);
     if (st.ok()) {
         DorisMetrics::instance()->upload_rowset_count->increment(1);
-        
DorisMetrics::instance()->upload_total_byte->increment(data_disk_size());
+        
DorisMetrics::instance()->upload_total_byte->increment(total_disk_size());
     } else {
         DorisMetrics::instance()->upload_fail_count->increment(1);
     }
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp 
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 3f60e7c5674..548b1950b81 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -81,7 +81,7 @@ void build_rowset_meta_with_spec_field(RowsetMeta& 
rowset_meta,
                                        const RowsetMeta& spec_rowset_meta) {
     rowset_meta.set_num_rows(spec_rowset_meta.num_rows());
     rowset_meta.set_total_disk_size(spec_rowset_meta.total_disk_size());
-    rowset_meta.set_data_disk_size(spec_rowset_meta.total_disk_size());
+    rowset_meta.set_data_disk_size(spec_rowset_meta.data_disk_size());
     rowset_meta.set_index_disk_size(spec_rowset_meta.index_disk_size());
     // TODO write zonemap to meta
     rowset_meta.set_empty(spec_rowset_meta.num_rows() == 0);
@@ -886,7 +886,8 @@ Status BaseBetaRowsetWriter::_build_rowset_meta(RowsetMeta* 
rowset_meta, bool ch
 
     rowset_meta->set_num_segments(segment_num);
     rowset_meta->set_num_rows(num_rows_written + _num_rows_written);
-    rowset_meta->set_total_disk_size(total_data_size + _total_data_size);
+    rowset_meta->set_total_disk_size(total_data_size + _total_data_size + 
total_index_size +
+                                     _total_index_size);
     rowset_meta->set_data_disk_size(total_data_size + _total_data_size);
     rowset_meta->set_index_disk_size(total_index_size + _total_index_size);
     rowset_meta->set_segments_key_bounds(segments_encoded_key_bounds);
@@ -1089,8 +1090,8 @@ Status 
BetaRowsetWriter::flush_segment_writer_for_segcompaction(
 
     SegmentStatistics segstat;
     segstat.row_num = row_num;
-    segstat.data_size = segment_size + inverted_index_file_size;
-    segstat.index_size = index_size + inverted_index_file_size;
+    segstat.data_size = segment_size;
+    segstat.index_size = inverted_index_file_size;
     segstat.key_bounds = key_bounds;
     {
         std::lock_guard<std::mutex> lock(_segid_statistics_map_mutex);
diff --git a/be/src/olap/rowset/beta_rowset_writer.h 
b/be/src/olap/rowset/beta_rowset_writer.h
index 47e12a531e9..4539959fab5 100644
--- a/be/src/olap/rowset/beta_rowset_writer.h
+++ b/be/src/olap/rowset/beta_rowset_writer.h
@@ -223,7 +223,6 @@ protected:
         RETURN_NOT_OK_STATUS_WITH_WARN(_idx_files.close(),
                                        "failed to close index file when build 
new rowset");
         this->_total_index_size += _idx_files.get_total_index_size();
-        this->_total_data_size += _idx_files.get_total_index_size();
         return Status::OK();
     }
 
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index 24e660cd2f7..e1a2347f6ae 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -149,7 +149,8 @@ public:
     int64_t start_version() const { return rowset_meta()->version().first; }
     int64_t end_version() const { return rowset_meta()->version().second; }
     size_t index_disk_size() const { return rowset_meta()->index_disk_size(); }
-    size_t data_disk_size() const { return rowset_meta()->total_disk_size(); }
+    size_t data_disk_size() const { return rowset_meta()->data_disk_size(); }
+    size_t total_disk_size() const { return rowset_meta()->total_disk_size(); }
     bool empty() const { return rowset_meta()->empty(); }
     bool zero_num_rows() const { return rowset_meta()->num_rows() == 0; }
     size_t num_rows() const { return rowset_meta()->num_rows(); }
diff --git a/be/src/olap/rowset/rowset_meta.cpp 
b/be/src/olap/rowset/rowset_meta.cpp
index 1571105fa73..6bed5e800ed 100644
--- a/be/src/olap/rowset/rowset_meta.cpp
+++ b/be/src/olap/rowset/rowset_meta.cpp
@@ -226,6 +226,7 @@ void RowsetMeta::merge_rowset_meta(const RowsetMeta& other) 
{
     set_data_disk_size(data_disk_size() + other.data_disk_size());
     set_total_disk_size(total_disk_size() + other.total_disk_size());
     set_index_disk_size(index_disk_size() + other.index_disk_size());
+    set_total_disk_size(data_disk_size() + index_disk_size());
     for (auto&& key_bound : other.get_segments_key_bounds()) {
         add_segment_key_bounds(key_bound);
     }
diff --git a/be/src/olap/rowset/segment_creator.cpp 
b/be/src/olap/rowset/segment_creator.cpp
index 5f4a3dce7b8..e0eb7534123 100644
--- a/be/src/olap/rowset/segment_creator.cpp
+++ b/be/src/olap/rowset/segment_creator.cpp
@@ -225,9 +225,9 @@ Status SegmentFlusher::_flush_segment_writer(
     if (row_num == 0) {
         return Status::OK();
     }
-    uint64_t segment_size;
-    uint64_t index_size;
-    Status s = writer->finalize(&segment_size, &index_size);
+    uint64_t segment_file_size;
+    uint64_t common_index_size;
+    Status s = writer->finalize(&segment_file_size, &common_index_size);
     if (!s.ok()) {
         return Status::Error(s.code(), "failed to finalize segment: {}", 
s.to_string());
     }
@@ -249,16 +249,20 @@ Status SegmentFlusher::_flush_segment_writer(
     uint32_t segment_id = writer->segment_id();
     SegmentStatistics segstat;
     segstat.row_num = row_num;
-    segstat.data_size = segment_size + inverted_index_file_size;
-    segstat.index_size = index_size + inverted_index_file_size;
+    segstat.data_size = segment_file_size;
+    segstat.index_size = inverted_index_file_size;
     segstat.key_bounds = key_bounds;
+    LOG(INFO) << "tablet_id:" << _context.tablet_id
+              << ", flushing rowset_dir: " << _context.tablet_path
+              << ", rowset_id:" << _context.rowset_id << ", data size:" << 
segstat.data_size
+              << ", index size:" << segstat.index_size;
 
     writer.reset();
 
     RETURN_IF_ERROR(_context.segment_collector->add(segment_id, segstat, 
flush_schema));
 
     if (flush_size) {
-        *flush_size = segment_size + inverted_index_file_size;
+        *flush_size = segment_file_size;
     }
     return Status::OK();
 }
@@ -274,9 +278,9 @@ Status 
SegmentFlusher::_flush_segment_writer(std::unique_ptr<segment_v2::Segment
     if (row_num == 0) {
         return Status::OK();
     }
-    uint64_t segment_size;
-    uint64_t index_size;
-    Status s = writer->finalize(&segment_size, &index_size);
+    uint64_t segment_file_size;
+    uint64_t common_index_size;
+    Status s = writer->finalize(&segment_file_size, &common_index_size);
     if (!s.ok()) {
         return Status::Error(s.code(), "failed to finalize segment: {}", 
s.to_string());
     }
@@ -298,16 +302,20 @@ Status 
SegmentFlusher::_flush_segment_writer(std::unique_ptr<segment_v2::Segment
     uint32_t segment_id = writer->get_segment_id();
     SegmentStatistics segstat;
     segstat.row_num = row_num;
-    segstat.data_size = segment_size + inverted_index_file_size;
-    segstat.index_size = index_size + inverted_index_file_size;
+    segstat.data_size = segment_file_size;
+    segstat.index_size = inverted_index_file_size;
     segstat.key_bounds = key_bounds;
+    LOG(INFO) << "tablet_id:" << _context.tablet_id
+              << ", flushing rowset_dir: " << _context.tablet_path
+              << ", rowset_id:" << _context.rowset_id << ", data size:" << 
segstat.data_size
+              << ", index size:" << segstat.index_size;
 
     writer.reset();
 
     RETURN_IF_ERROR(_context.segment_collector->add(segment_id, segstat, 
flush_schema));
 
     if (flush_size) {
-        *flush_size = segment_size + inverted_index_file_size;
+        *flush_size = segment_file_size;
     }
     return Status::OK();
 }
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp 
b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index fb8f6622685..46070f8dccd 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -138,7 +138,6 @@ Status 
VerticalBetaRowsetWriter<T>::_flush_columns(segment_v2::SegmentWriter* se
         this->_segment_num_rows.resize(_cur_writer_idx + 1);
         this->_segment_num_rows[_cur_writer_idx] = 
_segment_writers[_cur_writer_idx]->row_count();
     }
-    this->_total_index_size += static_cast<int64_t>(index_size);
     return Status::OK();
 }
 
diff --git a/be/src/olap/single_replica_compaction.cpp 
b/be/src/olap/single_replica_compaction.cpp
index ef93ab25cae..7470afe0ef6 100644
--- a/be/src/olap/single_replica_compaction.cpp
+++ b/be/src/olap/single_replica_compaction.cpp
@@ -149,11 +149,15 @@ Status 
SingleReplicaCompaction::_do_single_replica_compaction_impl() {
     LOG(INFO) << "succeed to do single replica compaction"
               << ". tablet=" << _tablet->tablet_id() << ", output_version=" << 
_output_version
               << ", current_max_version=" << current_max_version
-              << ", input_rowset_size=" << _input_rowsets_size
+              << ", input_rowsets_data_size=" << _input_rowsets_data_size
+              << ", input_rowsets_index_size=" << _input_rowsets_index_size
+              << ", input_rowsets_total_size=" << _input_rowsets_total_size
               << ", input_row_num=" << _input_row_num
               << ", input_segments_num=" << _input_num_segments
-              << ", _input_index_size=" << _input_index_size
+              << ", _input_index_size=" << _input_rowsets_index_size
               << ", output_rowset_data_size=" << 
_output_rowset->data_disk_size()
+              << ", output_rowset_index_size=" << 
_output_rowset->index_disk_size()
+              << ", output_rowset_total_size=" << 
_output_rowset->total_disk_size()
               << ", output_row_num=" << _output_rowset->num_rows()
               << ", output_segments_num=" << _output_rowset->num_segments();
     return Status::OK();
@@ -264,10 +268,11 @@ bool SingleReplicaCompaction::_find_rowset_to_fetch(const 
std::vector<Version>&
             return false;
         }
         for (auto& rowset : _input_rowsets) {
-            _input_rowsets_size += rowset->data_disk_size();
+            _input_rowsets_data_size += rowset->data_disk_size();
             _input_row_num += rowset->num_rows();
             _input_num_segments += rowset->num_segments();
-            _input_index_size += rowset->index_disk_size();
+            _input_rowsets_index_size += rowset->index_disk_size();
+            _input_rowsets_total_size += rowset->data_disk_size() + 
rowset->index_disk_size();
         }
         _output_version = *proper_version;
     }
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 450f3d2cb8b..7c69ba54831 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -26,6 +26,7 @@
 #include <gen_cpp/Metrics_types.h>
 #include <gen_cpp/olap_file.pb.h>
 #include <gen_cpp/types.pb.h>
+#include <glog/logging.h>
 #include <rapidjson/document.h>
 #include <rapidjson/encodings.h>
 #include <rapidjson/prettywriter.h>
@@ -35,6 +36,7 @@
 #include <algorithm>
 #include <atomic>
 #include <boost/container/detail/std_fwd.hpp>
+#include <cstdint>
 #include <roaring/roaring.hh>
 
 #include "common/compiler_util.h" // IWYU pragma: keep
@@ -86,6 +88,7 @@
 #include "olap/rowset/beta_rowset.h"
 #include "olap/rowset/rowset.h"
 #include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/rowset_fwd.h"
 #include "olap/rowset/rowset_meta.h"
 #include "olap/rowset/rowset_meta_manager.h"
 #include "olap/rowset/rowset_writer.h"
@@ -329,6 +332,7 @@ Status Tablet::init() {
 // should save tablet meta to remote meta store
 // if it's a primary replica
 void Tablet::save_meta() {
+    check_table_size_correctness();
     auto res = _tablet_meta->save_meta(_data_dir);
     CHECK_EQ(res, Status::OK()) << "fail to save tablet_meta. res=" << res
                                 << ", root=" << _data_dir->path();
@@ -2047,8 +2051,8 @@ Status Tablet::_cooldown_data(RowsetSharedPtr rowset) {
     LOG(INFO) << "Upload rowset " << old_rowset->version() << " " << 
new_rowset_id.to_string()
               << " to " << storage_resource.fs->root_path().native()
               << ", tablet_id=" << tablet_id() << ", duration=" << 
duration.count()
-              << ", capacity=" << old_rowset->data_disk_size()
-              << ", tp=" << old_rowset->data_disk_size() / duration.count()
+              << ", capacity=" << old_rowset->total_disk_size()
+              << ", tp=" << old_rowset->total_disk_size() / duration.count()
               << ", old rowset_id=" << old_rowset->rowset_id().to_string();
 
     // gen a new rowset
@@ -2427,7 +2431,7 @@ RowsetSharedPtr Tablet::need_cooldown(int64_t* 
cooldown_timestamp, size_t* file_
     // current time or it's datatime is less than current time
     if (newest_cooldown_time != 0 && newest_cooldown_time < UnixSeconds()) {
         *cooldown_timestamp = newest_cooldown_time;
-        *file_size = rowset->data_disk_size();
+        *file_size = rowset->total_disk_size();
         VLOG_DEBUG << "tablet need cooldown, tablet id: " << tablet_id()
                    << " file_size: " << *file_size;
         return rowset;
@@ -2737,4 +2741,120 @@ void Tablet::clear_cache() {
     }
 }
 
+void Tablet::check_table_size_correctness() {
+    if (!config::enable_table_size_correctness_check) {
+        return;
+    }
+    const std::vector<RowsetMetaSharedPtr>& all_rs_metas = 
_tablet_meta->all_rs_metas();
+    for (const auto& rs_meta : all_rs_metas) {
+        int64_t total_segment_size = get_segment_file_size(rs_meta);
+        int64_t total_inverted_index_size = 
get_inverted_index_file_szie(rs_meta);
+        if (rs_meta->data_disk_size() != total_segment_size ||
+            rs_meta->index_disk_size() != total_inverted_index_size ||
+            rs_meta->data_disk_size() + rs_meta->index_disk_size() != 
rs_meta->total_disk_size()) {
+            LOG(WARNING) << "[Local table table size check failed]:"
+                         << " tablet id: " << rs_meta->tablet_id()
+                         << ", rowset id:" << rs_meta->rowset_id()
+                         << ", rowset data disk size:" << 
rs_meta->data_disk_size()
+                         << ", rowset real data disk size:" << 
total_segment_size
+                         << ", rowset index disk size:" << 
rs_meta->index_disk_size()
+                         << ", rowset real index disk size:" << 
total_inverted_index_size
+                         << ", rowset total disk size:" << 
rs_meta->total_disk_size()
+                         << ", rowset segment path:"
+                         << StorageResource().remote_segment_path(
+                                    rs_meta->tablet_id(), 
rs_meta->rowset_id().to_string(), 0);
+            DCHECK(false);
+        }
+    }
+}
+
+std::string Tablet::get_segment_path(const RowsetMetaSharedPtr& rs_meta, 
int64_t seg_id) {
+    std::string segment_path;
+    if (rs_meta->is_local()) {
+        segment_path = local_segment_path(_tablet_path, 
rs_meta->rowset_id().to_string(), seg_id);
+    } else {
+        segment_path = 
rs_meta->remote_storage_resource().value()->remote_segment_path(
+                rs_meta->tablet_id(), rs_meta->rowset_id().to_string(), 
seg_id);
+    }
+    return segment_path;
+}
+
+int64_t Tablet::get_segment_file_size(const RowsetMetaSharedPtr& rs_meta) {
+    const auto& fs = rs_meta->fs();
+    if (!fs) {
+        LOG(WARNING) << "get fs failed, resource_id={}" << 
rs_meta->resource_id();
+    }
+    int64_t total_segment_size = 0;
+    for (int64_t seg_id = 0; seg_id < rs_meta->num_segments(); seg_id++) {
+        std::string segment_path = get_segment_path(rs_meta, seg_id);
+        int64_t segment_file_size = 0;
+        auto st = fs->file_size(segment_path, &segment_file_size);
+        if (!st.ok()) {
+            segment_file_size = 0;
+            LOG(WARNING) << "table size correctness check get segment size 
failed! msg:"
+                         << st.to_string() << ", segment path:" << 
segment_path;
+        }
+        total_segment_size += segment_file_size;
+    }
+    return total_segment_size;
+}
+
+int64_t Tablet::get_inverted_index_file_szie(const RowsetMetaSharedPtr& 
rs_meta) {
+    const auto& fs = rs_meta->fs();
+    if (!fs) {
+        LOG(WARNING) << "get fs failed, resource_id={}" << 
rs_meta->resource_id();
+    }
+    int64_t total_inverted_index_size = 0;
+
+    if (rs_meta->tablet_schema()->get_inverted_index_storage_format() ==
+        InvertedIndexStorageFormatPB::V1) {
+        auto indices = rs_meta->tablet_schema()->indexes();
+        for (auto& index : indices) {
+            // only get file_size for inverted index
+            if (index.index_type() != IndexType::INVERTED) {
+                continue;
+            }
+            for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) {
+                std::string segment_path = get_segment_path(rs_meta, seg_id);
+                int64_t file_size = 0;
+
+                std::string inverted_index_file_path =
+                        InvertedIndexDescriptor::get_index_file_path_v1(
+                                
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path),
+                                index.index_id(), index.get_index_suffix());
+                auto st = fs->file_size(inverted_index_file_path, &file_size);
+                if (!st.ok()) {
+                    file_size = 0;
+                    LOG(WARNING) << " tablet id: " << 
get_tablet_info().tablet_id
+                                 << ", rowset id:" << rs_meta->rowset_id()
+                                 << ", table size correctness check get 
inverted index v1 "
+                                    "size failed! msg:"
+                                 << st.to_string()
+                                 << ", inverted index path:" << 
inverted_index_file_path;
+                }
+                total_inverted_index_size += file_size;
+            }
+        }
+    } else {
+        for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) {
+            int64_t file_size = 0;
+            std::string segment_path = get_segment_path(rs_meta, seg_id);
+            std::string inverted_index_file_path = 
InvertedIndexDescriptor::get_index_file_path_v2(
+                    
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path));
+            auto st = fs->file_size(inverted_index_file_path, &file_size);
+            if (!st.ok()) {
+                file_size = 0;
+                LOG(WARNING) << " tablet id: " << get_tablet_info().tablet_id
+                             << ", rowset id:" << rs_meta->rowset_id()
+                             << ", table size correctness check get inverted 
index v2 "
+                                "size failed! msg:"
+                             << st.to_string()
+                             << ", inverted index path:" << 
inverted_index_file_path;
+            }
+            total_inverted_index_size += file_size;
+        }
+    }
+    return total_inverted_index_size;
+}
+
 } // namespace doris
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 2d7975b0fc1..e181af3d4d3 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -534,6 +534,10 @@ private:
     
////////////////////////////////////////////////////////////////////////////
 
     void _clear_cache_by_rowset(const BetaRowsetSharedPtr& rowset);
+    void check_table_size_correctness();
+    std::string get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t 
seg_id);
+    int64_t get_segment_file_size(const RowsetMetaSharedPtr& rs_meta);
+    int64_t get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta);
 
 public:
     static const int64_t K_INVALID_CUMULATIVE_POINT = -1;
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 394aeb17b85..d56e529e42b 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -642,7 +642,7 @@ inline size_t TabletMeta::num_rows() const {
 inline size_t TabletMeta::tablet_footprint() const {
     size_t total_size = 0;
     for (auto& rs : _rs_metas) {
-        total_size += rs->data_disk_size();
+        total_size += rs->total_disk_size();
     }
     return total_size;
 }
@@ -651,7 +651,7 @@ inline size_t TabletMeta::tablet_local_size() const {
     size_t total_size = 0;
     for (auto& rs : _rs_metas) {
         if (rs->is_local()) {
-            total_size += rs->data_disk_size();
+            total_size += rs->total_disk_size();
         }
     }
     return total_size;
@@ -661,7 +661,7 @@ inline size_t TabletMeta::tablet_remote_size() const {
     size_t total_size = 0;
     for (auto& rs : _rs_metas) {
         if (!rs->is_local()) {
-            total_size += rs->data_disk_size();
+            total_size += rs->total_disk_size();
         }
     }
     return total_size;
diff --git a/be/src/olap/task/engine_checksum_task.cpp 
b/be/src/olap/task/engine_checksum_task.cpp
index d0c4b0e45f4..05ecfc0401b 100644
--- a/be/src/olap/task/engine_checksum_task.cpp
+++ b/be/src/olap/task/engine_checksum_task.cpp
@@ -93,7 +93,7 @@ Status EngineChecksumTask::_compute_checksum() {
     }
     size_t input_size = 0;
     for (const auto& rowset : input_rowsets) {
-        input_size += rowset->data_disk_size();
+        input_size += rowset->total_disk_size();
     }
 
     auto res = reader.init(reader_params);
diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index 2ecadfa53b0..09cbdeadb3f 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -207,13 +207,12 @@ Status IndexBuilder::update_inverted_index_info() {
             InvertedIndexStorageFormatPB::V1) {
             if (_is_drop_op) {
                 VLOG_DEBUG << "data_disk_size:" << 
input_rowset_meta->data_disk_size()
-                           << " total_disk_size:" << 
input_rowset_meta->data_disk_size()
+                           << " total_disk_size:" << 
input_rowset_meta->total_disk_size()
                            << " index_disk_size:" << 
input_rowset_meta->index_disk_size()
                            << " drop_index_size:" << drop_index_size;
                 
rowset_meta->set_total_disk_size(input_rowset_meta->total_disk_size() -
                                                  drop_index_size);
-                
rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size() -
-                                                drop_index_size);
+                
rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size());
                 
rowset_meta->set_index_disk_size(input_rowset_meta->index_disk_size() -
                                                  drop_index_size);
             } else {
@@ -238,7 +237,7 @@ Status IndexBuilder::update_inverted_index_info() {
             }
             
rowset_meta->set_total_disk_size(input_rowset_meta->total_disk_size() -
                                              total_index_size);
-            
rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size() - 
total_index_size);
+            
rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size());
             
rowset_meta->set_index_disk_size(input_rowset_meta->index_disk_size() -
                                              total_index_size);
         }
@@ -323,8 +322,7 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
                 inverted_index_size += 
inverted_index_writer->get_index_file_total_size();
             }
             _inverted_index_file_writers.clear();
-            
output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size() +
-                                                   inverted_index_size);
+            
output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size());
             
output_rowset_meta->set_total_disk_size(output_rowset_meta->total_disk_size() +
                                                     inverted_index_size);
             
output_rowset_meta->set_index_disk_size(output_rowset_meta->index_disk_size() +
@@ -489,8 +487,7 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
         }
         _inverted_index_builders.clear();
         _inverted_index_file_writers.clear();
-        
output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size() +
-                                               inverted_index_size);
+        
output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size());
         
output_rowset_meta->set_total_disk_size(output_rowset_meta->total_disk_size() +
                                                 inverted_index_size);
         
output_rowset_meta->set_index_disk_size(output_rowset_meta->index_disk_size() +
diff --git a/be/src/runtime/load_stream_writer.cpp 
b/be/src/runtime/load_stream_writer.cpp
index 37243fab14b..2e987edc7bd 100644
--- a/be/src/runtime/load_stream_writer.cpp
+++ b/be/src/runtime/load_stream_writer.cpp
@@ -201,7 +201,7 @@ Status LoadStreamWriter::add_segment(uint32_t segid, const 
SegmentStatistics& st
     }
 
     DBUG_EXECUTE_IF("LoadStreamWriter.add_segment.size_not_match", { 
segment_file_size++; });
-    if (segment_file_size + inverted_file_size != stat.data_size) {
+    if (segment_file_size != stat.data_size) {
         return Status::Corruption(
                 "add_segment failed, segment stat {} does not match, file 
size={}, inverted file "
                 "size={}, stat.data_size={}, tablet id={}",
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index 7caba826520..daeb5ddfee5 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -217,5 +217,4 @@ CONF_Int32(max_tablet_index_num_per_batch, "1000");
 
 // Max aborted txn num for the same label name
 CONF_mInt64(max_num_aborted_txn, "100");
-
 } // namespace doris::cloud::config
diff --git a/cloud/src/meta-service/meta_service_txn.cpp 
b/cloud/src/meta-service/meta_service_txn.cpp
index cc333c42846..32f6b56f51a 100644
--- a/cloud/src/meta-service/meta_service_txn.cpp
+++ b/cloud/src/meta-service/meta_service_txn.cpp
@@ -1167,7 +1167,7 @@ void commit_txn_immediately(
 
             // Accumulate affected rows
             auto& stats = tablet_stats[tablet_id];
-            stats.data_size += i.data_disk_size();
+            stats.data_size += i.total_disk_size();
             stats.num_rows += i.num_rows();
             ++stats.num_rowsets;
             stats.num_segs += i.num_segments();
diff --git a/cloud/test/meta_service_http_test.cpp 
b/cloud/test/meta_service_http_test.cpp
index 20dee957126..e49628fcb3a 100644
--- a/cloud/test/meta_service_http_test.cpp
+++ b/cloud/test/meta_service_http_test.cpp
@@ -320,6 +320,8 @@ static doris::RowsetMetaCloudPB create_rowset(int64_t 
txn_id, int64_t tablet_id,
     rowset.set_num_segments(1);
     rowset.set_num_rows(num_rows);
     rowset.set_data_disk_size(num_rows * 100);
+    rowset.set_index_disk_size(num_rows * 10);
+    rowset.set_total_disk_size(num_rows * 110);
     rowset.mutable_tablet_schema()->set_schema_version(0);
     rowset.set_txn_expiration(::time(nullptr)); // Required by DCHECK
     return rowset;
@@ -1285,7 +1287,7 @@ TEST(MetaServiceHttpTest, GetTabletStatsTest) {
     stats_tablet_data_size_key({mock_instance, table_id, index_id, 
partition_id, tablet_id},
                                &data_size_key);
     ASSERT_EQ(txn->get(data_size_key, &data_size_val), TxnErrorCode::TXN_OK);
-    EXPECT_EQ(*(int64_t*)data_size_val.data(), 20000);
+    EXPECT_EQ(*(int64_t*)data_size_val.data(), 22000);
     std::string num_rows_key, num_rows_val;
     stats_tablet_num_rows_key({mock_instance, table_id, index_id, 
partition_id, tablet_id},
                               &num_rows_key);
@@ -1306,7 +1308,7 @@ TEST(MetaServiceHttpTest, GetTabletStatsTest) {
     get_tablet_stats(meta_service.get(), table_id, index_id, partition_id, 
tablet_id, res);
     ASSERT_EQ(res.status().code(), MetaServiceCode::OK);
     ASSERT_EQ(res.tablet_stats_size(), 1);
-    EXPECT_EQ(res.tablet_stats(0).data_size(), 40000);
+    EXPECT_EQ(res.tablet_stats(0).data_size(), 44000);
     EXPECT_EQ(res.tablet_stats(0).num_rows(), 400);
     EXPECT_EQ(res.tablet_stats(0).num_rowsets(), 5);
     EXPECT_EQ(res.tablet_stats(0).num_segments(), 4);
diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp
index 3baec482710..ee90e604e1c 100644
--- a/cloud/test/meta_service_test.cpp
+++ b/cloud/test/meta_service_test.cpp
@@ -178,6 +178,8 @@ static doris::RowsetMetaCloudPB create_rowset(int64_t 
txn_id, int64_t tablet_id,
     rowset.set_num_segments(1);
     rowset.set_num_rows(num_rows);
     rowset.set_data_disk_size(num_rows * 100);
+    rowset.set_index_disk_size(num_rows * 10);
+    rowset.set_total_disk_size(num_rows * 110);
     rowset.mutable_tablet_schema()->set_schema_version(0);
     rowset.set_txn_expiration(::time(nullptr)); // Required by DCHECK
     return rowset;
@@ -4429,7 +4431,7 @@ TEST(MetaServiceTest, GetTabletStatsTest) {
     stats_tablet_data_size_key({mock_instance, table_id, index_id, 
partition_id, tablet_id},
                                &data_size_key);
     ASSERT_EQ(txn->get(data_size_key, &data_size_val), TxnErrorCode::TXN_OK);
-    EXPECT_EQ(*(int64_t*)data_size_val.data(), 20000);
+    EXPECT_EQ(*(int64_t*)data_size_val.data(), 22000);
     std::string num_rows_key, num_rows_val;
     stats_tablet_num_rows_key({mock_instance, table_id, index_id, 
partition_id, tablet_id},
                               &num_rows_key);
@@ -4450,7 +4452,7 @@ TEST(MetaServiceTest, GetTabletStatsTest) {
     get_tablet_stats(meta_service.get(), table_id, index_id, partition_id, 
tablet_id, res);
     ASSERT_EQ(res.status().code(), MetaServiceCode::OK);
     ASSERT_EQ(res.tablet_stats_size(), 1);
-    EXPECT_EQ(res.tablet_stats(0).data_size(), 40000);
+    EXPECT_EQ(res.tablet_stats(0).data_size(), 44000);
     EXPECT_EQ(res.tablet_stats(0).num_rows(), 400);
     EXPECT_EQ(res.tablet_stats(0).num_rowsets(), 5);
     EXPECT_EQ(res.tablet_stats(0).num_segments(), 4);
diff --git a/cloud/test/schema_kv_test.cpp b/cloud/test/schema_kv_test.cpp
index 69ee9aba442..07f658175c8 100644
--- a/cloud/test/schema_kv_test.cpp
+++ b/cloud/test/schema_kv_test.cpp
@@ -293,6 +293,8 @@ static doris::RowsetMetaCloudPB create_rowset(int64_t 
txn_id, int64_t tablet_id,
     rowset.set_num_rows(100);
     rowset.set_num_segments(1);
     rowset.set_data_disk_size(10000);
+    rowset.set_index_disk_size(1000);
+    rowset.set_total_disk_size(11000);
     if (version > 0) {
         rowset.set_start_version(version);
         rowset.set_end_version(version);
@@ -478,7 +480,7 @@ TEST(DetachSchemaKVTest, RowsetTest) {
         EXPECT_EQ(get_rowset_res.stats().num_rows(), 100);
         EXPECT_EQ(get_rowset_res.stats().num_rowsets(), 2);
         EXPECT_EQ(get_rowset_res.stats().num_segments(), 1);
-        EXPECT_EQ(get_rowset_res.stats().data_size(), 10000);
+        EXPECT_EQ(get_rowset_res.stats().data_size(), 11000);
     }
 
     // new MS read rowsets committed by both old and new MS
@@ -527,7 +529,7 @@ TEST(DetachSchemaKVTest, RowsetTest) {
         EXPECT_EQ(get_rowset_res->stats().num_rows(), 2500);
         EXPECT_EQ(get_rowset_res->stats().num_rowsets(), 26);
         EXPECT_EQ(get_rowset_res->stats().num_segments(), 25);
-        EXPECT_EQ(get_rowset_res->stats().data_size(), 250000);
+        EXPECT_EQ(get_rowset_res->stats().data_size(), 275000);
         if (schema != nullptr) {
             auto schema_version = 
get_rowset_res->rowset_meta(10).schema_version();
             
get_rowset_res->mutable_rowset_meta(10)->mutable_tablet_schema()->set_schema_version(3);
diff --git a/regression-test/pipeline/cloud_p0/conf/be_custom.conf 
b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
index 377a02536c6..5fbf89fd75c 100644
--- a/regression-test/pipeline/cloud_p0/conf/be_custom.conf
+++ b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
@@ -37,3 +37,4 @@ enable_new_tablet_do_compaction = true
 arrow_flight_sql_port = 8181
 pipeline_task_leakage_detect_period_sec=1
 crash_in_memory_tracker_inaccurate = true
+enable_table_size_correctness_check=true
diff --git a/regression-test/pipeline/cloud_p1/conf/be_custom.conf 
b/regression-test/pipeline/cloud_p1/conf/be_custom.conf
index 4310441a0ed..0d3ae0c526d 100644
--- a/regression-test/pipeline/cloud_p1/conf/be_custom.conf
+++ b/regression-test/pipeline/cloud_p1/conf/be_custom.conf
@@ -33,3 +33,4 @@ arrow_flight_sql_port = 8181
 pipeline_task_leakage_detect_period_sec=1
 crash_in_memory_tracker_inaccurate = true
 enable_new_tablet_do_compaction = true
+enable_table_size_correctness_check=true
diff --git a/regression-test/pipeline/p0/conf/be.conf 
b/regression-test/pipeline/p0/conf/be.conf
index 745515aed06..760f813ffeb 100644
--- a/regression-test/pipeline/p0/conf/be.conf
+++ b/regression-test/pipeline/p0/conf/be.conf
@@ -71,3 +71,4 @@ be_proc_monitor_interval_ms = 30000
 webserver_num_workers = 128
 pipeline_task_leakage_detect_period_sec=1
 crash_in_memory_tracker_inaccurate = true
+enable_table_size_correctness_check=true
diff --git a/regression-test/pipeline/p1/conf/be.conf 
b/regression-test/pipeline/p1/conf/be.conf
index fbb36c340f0..1512bce7ac2 100644
--- a/regression-test/pipeline/p1/conf/be.conf
+++ b/regression-test/pipeline/p1/conf/be.conf
@@ -63,3 +63,4 @@ enable_missing_rows_correctness_check=true
 enable_jvm_monitor = true
 pipeline_task_leakage_detect_period_sec=1
 crash_in_memory_tracker_inaccurate = true
+enable_table_size_correctness_check=true


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to