gavinchou commented on code in PR #41022:
URL: https://github.com/apache/doris/pull/41022#discussion_r1803047251
##########
be/src/cloud/cloud_meta_mgr.cpp:
##########
@@ -750,6 +751,130 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta&
rs_meta,
Status ret_st;
TEST_INJECTION_POINT_RETURN_WITH_VALUE("CloudMetaMgr::commit_rowset",
ret_st);
}
+ if (config::enable_table_size_correctness_check) {
+ const auto fs = const_cast<RowsetMeta&>(rs_meta).fs();
+ if (!fs) {
+ LOG(WARNING) << "get fs failed, resource_id={}" <<
rs_meta.resource_id();
+ }
+ int64_t total_segment_size = 0;
+ int64_t total_inverted_index_size = 0;
+ for (int64_t seg_id = 0; seg_id < rs_meta.num_segments(); seg_id++) {
+ std::string segment_path = StorageResource().remote_segment_path(
+ rs_meta.tablet_id(), rs_meta.rowset_id().to_string(),
seg_id);
+ int64_t segment_file_size = 0;
+ auto st = fs->file_size(segment_path, &segment_file_size);
+ if (!st.ok()) {
+ segment_file_size = 0;
+ LOG(WARNING) << "table size correctness check get segment size
failed! msg:"
+ << st.msg() << ", segment path:" << segment_path;
+ }
+ total_segment_size += segment_file_size;
+ }
+
+ if (rs_meta.tablet_schema()->get_inverted_index_storage_format() ==
+ InvertedIndexStorageFormatPB::V1) {
+ auto indices = rs_meta.tablet_schema()->indexes();
+ for (auto& index : indices) {
+ // only get file_size for inverted index
+ if (index.index_type() != IndexType::INVERTED) {
+ continue;
+ }
+ for (int seg_id = 0; seg_id < rs_meta.num_segments();
++seg_id) {
+ std::string segment_path =
StorageResource().remote_segment_path(
+ rs_meta.tablet_id(),
rs_meta.rowset_id().to_string(), seg_id);
+ int64_t file_size = 0;
+
+ std::string inverted_index_file_path =
+ InvertedIndexDescriptor::get_index_file_path_v1(
+
InvertedIndexDescriptor::get_index_file_path_prefix(
+ segment_path),
+ index.index_id(),
index.get_index_suffix());
+ auto st = fs->file_size(inverted_index_file_path,
&file_size);
+ if (!st.ok()) {
Review Comment:
do not continue if it is turely not ok
##########
be/src/olap/tablet.cpp:
##########
@@ -330,6 +332,136 @@ Status Tablet::init() {
// should save tablet meta to remote meta store
// if it's a primary replica
void Tablet::save_meta() {
+ if (config::enable_table_size_correctness_check) {
Review Comment:
make it separate function.
##########
be/src/cloud/cloud_meta_mgr.cpp:
##########
@@ -750,6 +751,130 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta&
rs_meta,
Status ret_st;
TEST_INJECTION_POINT_RETURN_WITH_VALUE("CloudMetaMgr::commit_rowset",
ret_st);
}
+ if (config::enable_table_size_correctness_check) {
+ const auto fs = const_cast<RowsetMeta&>(rs_meta).fs();
+ if (!fs) {
+ LOG(WARNING) << "get fs failed, resource_id={}" <<
rs_meta.resource_id();
+ }
+ int64_t total_segment_size = 0;
+ int64_t total_inverted_index_size = 0;
+ for (int64_t seg_id = 0; seg_id < rs_meta.num_segments(); seg_id++) {
+ std::string segment_path = StorageResource().remote_segment_path(
+ rs_meta.tablet_id(), rs_meta.rowset_id().to_string(),
seg_id);
+ int64_t segment_file_size = 0;
+ auto st = fs->file_size(segment_path, &segment_file_size);
+ if (!st.ok()) {
+ segment_file_size = 0;
+ LOG(WARNING) << "table size correctness check get segment size
failed! msg:"
+ << st.msg() << ", segment path:" << segment_path;
+ }
+ total_segment_size += segment_file_size;
+ }
+
+ if (rs_meta.tablet_schema()->get_inverted_index_storage_format() ==
+ InvertedIndexStorageFormatPB::V1) {
+ auto indices = rs_meta.tablet_schema()->indexes();
+ for (auto& index : indices) {
+ // only get file_size for inverted index
+ if (index.index_type() != IndexType::INVERTED) {
+ continue;
+ }
+ for (int seg_id = 0; seg_id < rs_meta.num_segments();
++seg_id) {
+ std::string segment_path =
StorageResource().remote_segment_path(
+ rs_meta.tablet_id(),
rs_meta.rowset_id().to_string(), seg_id);
+ int64_t file_size = 0;
+
+ std::string inverted_index_file_path =
+ InvertedIndexDescriptor::get_index_file_path_v1(
+
InvertedIndexDescriptor::get_index_file_path_prefix(
+ segment_path),
+ index.index_id(),
index.get_index_suffix());
+ auto st = fs->file_size(inverted_index_file_path,
&file_size);
+ if (!st.ok()) {
+ file_size = 0;
+ LOG(WARNING)
+ << "table size correctness check get inverted
index v1 "
+ "size failed! msg:"
+ << st.msg() << ", inverted index path:" <<
inverted_index_file_path;
+ }
+ total_inverted_index_size += file_size;
+ }
+ }
+ } else {
+ for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) {
+ int64_t file_size = 0;
+ std::string segment_path =
StorageResource().remote_segment_path(
+ rs_meta.tablet_id(), rs_meta.rowset_id().to_string(),
seg_id);
+
+ std::string inverted_index_file_path =
+ InvertedIndexDescriptor::get_index_file_path_v2(
+
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path));
+ auto st = fs->file_size(inverted_index_file_path, &file_size);
+ if (!st.ok()) {
+ file_size = 0;
+ LOG(WARNING) << "table size correctness check get inverted
index v2 size "
+ "failed! msg:"
+ << st.msg()
+ << ", inverted index path:" <<
inverted_index_file_path;
+ }
+ total_inverted_index_size += file_size;
+ }
+ }
+ LOG(INFO) << "[Cloud table segment size check info]:"
+ << " tablet id: " << rs_meta.tablet_id() << ", rowset id:"
<< rs_meta.rowset_id()
+ << ", rowset data disk size:" << rs_meta.data_disk_size()
+ << ", rowset real data disk size:" << total_segment_size
+ << ", rowset index disk size:" << rs_meta.index_disk_size()
+ << ", rowset real index disk size:" <<
total_inverted_index_size
+ << ", rowset total disk size:" << rs_meta.total_disk_size()
+ << ", rowset segment path:"
+ << StorageResource().remote_segment_path(rs_meta.tablet_id(),
+
rs_meta.rowset_id().to_string(), 0)
+ << ".";
+ if (rs_meta.data_disk_size() != total_segment_size) {
Review Comment:
keep only one if
```
if (rs_meta.data_disk_size() != total_segment_size || ...)
```
##########
be/src/cloud/cloud_meta_mgr.cpp:
##########
@@ -750,6 +751,130 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta&
rs_meta,
Status ret_st;
TEST_INJECTION_POINT_RETURN_WITH_VALUE("CloudMetaMgr::commit_rowset",
ret_st);
}
+ if (config::enable_table_size_correctness_check) {
Review Comment:
make it a separate function
##########
be/src/olap/tablet.cpp:
##########
@@ -330,6 +332,136 @@ Status Tablet::init() {
// should save tablet meta to remote meta store
// if it's a primary replica
void Tablet::save_meta() {
+ if (config::enable_table_size_correctness_check) {
+ const std::vector<RowsetMetaSharedPtr>& all_rs_metas =
_tablet_meta->all_rs_metas();
+ for (const auto& rs_meta : all_rs_metas) {
+ const auto& fs = rs_meta->fs();
+ if (!fs) {
+ LOG(WARNING) << "get fs failed, resource_id={}" <<
rs_meta->resource_id();
+ }
+ int64_t total_segment_size = 0;
+ int64_t total_inverted_index_size = 0;
+ for (int64_t seg_id = 0; seg_id < rs_meta->num_segments();
seg_id++) {
+ std::string segment_path = fmt::format("{}/{}_{}.dat",
_tablet_path,
Review Comment:
why not calling `std::string local_segment_path()`
##########
be/src/cloud/cloud_meta_mgr.cpp:
##########
@@ -750,6 +751,130 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta&
rs_meta,
Status ret_st;
TEST_INJECTION_POINT_RETURN_WITH_VALUE("CloudMetaMgr::commit_rowset",
ret_st);
}
+ if (config::enable_table_size_correctness_check) {
Review Comment:
consider move to rowset builder build.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]