csun5285 commented on code in PR #41022:
URL: https://github.com/apache/doris/pull/41022#discussion_r1824258896


##########
be/src/cloud/cloud_meta_mgr.cpp:
##########
@@ -1125,4 +1128,124 @@ Status CloudMetaMgr::remove_old_version_delete_bitmap(
     return st;
 }
 
+void CloudMetaMgr::check_table_size_correctness(const RowsetMeta& rs_meta) {
+    if (!config::enable_table_size_correctness_check) {
+        return;
+    }
+    int64_t total_segment_size = get_segment_file_size(rs_meta);
+    int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta);
+    if (rs_meta.data_disk_size() != total_segment_size ||
+        rs_meta.index_disk_size() != total_inverted_index_size ||
+        rs_meta.data_disk_size() + rs_meta.index_disk_size() != 
rs_meta.total_disk_size()) {
+        LOG(WARNING) << "[Cloud table table size check failed]:"
+                     << " tablet id: " << rs_meta.tablet_id()
+                     << ", rowset id:" << rs_meta.rowset_id()
+                     << ", rowset data disk size:" << rs_meta.data_disk_size()
+                     << ", rowset real data disk size:" << total_segment_size
+                     << ", rowset index disk size:" << 
rs_meta.index_disk_size()
+                     << ", rowset real index disk size:" << 
total_inverted_index_size
+                     << ", rowset total disk size:" << 
rs_meta.total_disk_size()
+                     << ", rowset segment path:"
+                     << 
StorageResource().remote_segment_path(rs_meta.tablet_id(),
+                                                              
rs_meta.rowset_id().to_string(), 0);
+        DCHECK(false);
+    }
+}
+
+int64_t CloudMetaMgr::get_segment_file_size(const RowsetMeta& rs_meta) {
+    int64_t total_segment_size = 0;
+    const auto fs = const_cast<RowsetMeta&>(rs_meta).fs();
+    if (!fs) {
+        LOG(WARNING) << "get fs failed, resource_id={}" << 
rs_meta.resource_id();
+    }
+    for (int64_t seg_id = 0; seg_id < rs_meta.num_segments(); seg_id++) {
+        std::string segment_path = StorageResource().remote_segment_path(
+                rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id);
+        int64_t segment_file_size = 0;
+        auto st = fs->file_size(segment_path, &segment_file_size);
+        if (!st.ok()) {
+            segment_file_size = 0;
+            if (st.is<FILE_NOT_EXIST>()) {
+                LOG(INFO) << "cloud table size correctness check get segment 
size 0 because "
+                             "file not exist! msg:"
+                          << st.msg() << ", segment path:" << segment_path;
+            } else {
+                LOG(WARNING) << "cloud table size correctness check get 
segment size failed! msg:"
+                             << st.msg() << ", segment path:" << segment_path;
+            }
+        }
+        total_segment_size += segment_file_size;
+    }
+    return total_segment_size;
+}
+
+int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) {
+    int64_t total_inverted_index_size = 0;
+    const auto fs = const_cast<RowsetMeta&>(rs_meta).fs();
+    if (!fs) {
+        LOG(WARNING) << "get fs failed, resource_id={}" << 
rs_meta.resource_id();
+    }
+    if (rs_meta.tablet_schema()->get_inverted_index_storage_format() ==
+        InvertedIndexStorageFormatPB::V1) {
+        auto indices = rs_meta.tablet_schema()->indexes();
+        for (auto& index : indices) {
+            // only get file_size for inverted index
+            if (index.index_type() != IndexType::INVERTED) {
+                continue;
+            }
+            for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) {
+                std::string segment_path = 
StorageResource().remote_segment_path(
+                        rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), 
seg_id);
+                int64_t file_size = 0;
+
+                std::string inverted_index_file_path =
+                        InvertedIndexDescriptor::get_index_file_path_v1(
+                                
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path),
+                                index.index_id(), index.get_index_suffix());
+                auto st = fs->file_size(inverted_index_file_path, &file_size);
+                if (!st.ok()) {
+                    file_size = 0;
+                    if (st.is<FILE_NOT_EXIST>()) {
+                        LOG(INFO) << "cloud table size correctness check get 
inverted index v1 "
+                                     "0 because file not exist! msg:"
+                                  << st.msg()
+                                  << ", inverted index path:" << 
inverted_index_file_path;
+                    } else {
+                        LOG(WARNING)
+                                << "cloud table size correctness check get 
inverted index v1 "
+                                   "size failed! msg:"
+                                << st.msg() << ", inverted index path:" << 
inverted_index_file_path;
+                    }
+                }
+                total_inverted_index_size += file_size;
+            }
+        }
+    } else {

Review Comment:
   if (rs_meta.tablet_schema()->has_inverted_index())



##########
be/src/runtime/load_stream_writer.cpp:
##########
@@ -201,7 +201,7 @@ Status LoadStreamWriter::add_segment(uint32_t segid, const 
SegmentStatistics& st
     }
 
     DBUG_EXECUTE_IF("LoadStreamWriter.add_segment.size_not_match", { 
segment_file_size++; });
-    if (segment_file_size + inverted_file_size != stat.data_size) {
+    if (segment_file_size != stat.data_size) {

Review Comment:
   segment_file_size != stat.data_size || inverted_file_size != stat.index_size



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to