This is an automated email from the ASF dual-hosted git repository. gavinchou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d1b448fd71b [fix](rowset meta) Fix rowset meta size relation (#41022) d1b448fd71b is described below commit d1b448fd71b3feaa539dee973f8b230cef2d2b20 Author: abmdocrt <yukang.lian2...@gmail.com> AuthorDate: Thu Oct 31 19:41:05 2024 +0800 [fix](rowset meta) Fix rowset meta size relation (#41022) Issue #40879 step 2 make rowset total size = rowset data size + rowset index size --- be/src/cloud/cloud_base_compaction.cpp | 23 ++-- be/src/cloud/cloud_cumulative_compaction.cpp | 25 ++-- .../cloud/cloud_cumulative_compaction_policy.cpp | 2 +- be/src/cloud/cloud_full_compaction.cpp | 26 +++-- be/src/cloud/cloud_meta_mgr.cpp | 123 ++++++++++++++++++++ be/src/cloud/cloud_meta_mgr.h | 3 + be/src/cloud/cloud_rowset_builder.cpp | 2 +- be/src/cloud/cloud_schema_change_job.cpp | 2 +- be/src/cloud/cloud_tablet.cpp | 2 +- be/src/common/config.cpp | 2 + be/src/common/config.h | 2 + be/src/olap/base_compaction.cpp | 2 +- be/src/olap/compaction.cpp | 29 +++-- be/src/olap/compaction.h | 5 +- be/src/olap/cumulative_compaction.cpp | 3 +- be/src/olap/rowset/beta_rowset.cpp | 2 +- be/src/olap/rowset/beta_rowset_writer.cpp | 9 +- be/src/olap/rowset/beta_rowset_writer.h | 1 - be/src/olap/rowset/rowset.h | 3 +- be/src/olap/rowset/rowset_meta.cpp | 1 + be/src/olap/rowset/segment_creator.cpp | 32 ++++-- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 1 - be/src/olap/single_replica_compaction.cpp | 13 ++- be/src/olap/tablet.cpp | 126 ++++++++++++++++++++- be/src/olap/tablet.h | 4 + be/src/olap/tablet_meta.h | 6 +- be/src/olap/task/engine_checksum_task.cpp | 2 +- be/src/olap/task/index_builder.cpp | 13 +-- be/src/runtime/load_stream_writer.cpp | 2 +- cloud/src/common/config.h | 1 - cloud/src/meta-service/meta_service_txn.cpp | 2 +- cloud/test/meta_service_http_test.cpp | 6 +- cloud/test/meta_service_test.cpp | 6 +- cloud/test/schema_kv_test.cpp | 6 +- .../pipeline/cloud_p0/conf/be_custom.conf | 1 + .../pipeline/cloud_p1/conf/be_custom.conf | 1 + regression-test/pipeline/p0/conf/be.conf | 1 + regression-test/pipeline/p1/conf/be.conf | 1 + 38 files changed, 398 insertions(+), 93 deletions(-) diff --git a/be/src/cloud/cloud_base_compaction.cpp b/be/src/cloud/cloud_base_compaction.cpp index f431eaf850b..88d83000e95 100644 --- a/be/src/cloud/cloud_base_compaction.cpp +++ b/be/src/cloud/cloud_base_compaction.cpp @@ -124,7 +124,8 @@ Status CloudBaseCompaction::prepare_compact() { for (auto& rs : _input_rowsets) { _input_row_num += rs->num_rows(); _input_segments += rs->num_segments(); - _input_rowsets_size += rs->data_disk_size(); + _input_rowsets_data_size += rs->data_disk_size(); + _input_rowsets_total_size += rs->total_disk_size(); } LOG_INFO("start CloudBaseCompaction, tablet_id={}, range=[{}-{}]", _tablet->tablet_id(), _input_rowsets.front()->start_version(), _input_rowsets.back()->end_version()) @@ -132,7 +133,9 @@ Status CloudBaseCompaction::prepare_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size); + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size); return st; } @@ -270,17 +273,21 @@ Status CloudBaseCompaction::execute_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total", _input_rowsets_total_size) .tag("output_rows", _output_rowset->num_rows()) .tag("output_segments", _output_rowset->num_segments()) - .tag("output_data_size", _output_rowset->data_disk_size()); + .tag("output_rowset_data_size", _output_rowset->data_disk_size()) + .tag("output_rowset_index_size", _output_rowset->index_disk_size()) + .tag("output_rowset_total_size", _output_rowset->total_disk_size()); //_compaction_succeed = true; _state = CompactionState::SUCCESS; DorisMetrics::instance()->base_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_size); - base_output_size << _output_rowset->data_disk_size(); + DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_total_size); + base_output_size << _output_rowset->total_disk_size(); return Status::OK(); } @@ -302,8 +309,8 @@ Status CloudBaseCompaction::modify_rowsets() { compaction_job->set_output_cumulative_point(cloud_tablet()->cumulative_layer_point()); compaction_job->set_num_input_rows(_input_row_num); compaction_job->set_num_output_rows(_output_rowset->num_rows()); - compaction_job->set_size_input_rowsets(_input_rowsets_size); - compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size()); + compaction_job->set_size_input_rowsets(_input_rowsets_total_size); + compaction_job->set_size_output_rowsets(_output_rowset->total_disk_size()); compaction_job->set_num_input_segments(_input_segments); compaction_job->set_num_output_segments(_output_rowset->num_segments()); compaction_job->set_num_input_rowsets(_input_rowsets.size()); diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index 7910d94534e..8eb92577693 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -164,7 +164,9 @@ PREPARE_TRY_AGAIN: for (auto& rs : _input_rowsets) { _input_row_num += rs->num_rows(); _input_segments += rs->num_segments(); - _input_rowsets_size += rs->data_disk_size(); + _input_rowsets_data_size += rs->data_disk_size(); + _input_rowsets_index_size += rs->index_disk_size(); + _input_rowsets_total_size += rs->total_disk_size(); } LOG_INFO("start CloudCumulativeCompaction, tablet_id={}, range=[{}-{}]", _tablet->tablet_id(), _input_rowsets.front()->start_version(), _input_rowsets.back()->end_version()) @@ -172,7 +174,9 @@ PREPARE_TRY_AGAIN: .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size) .tag("tablet_max_version", cloud_tablet()->max_version_unlocked()) .tag("cumulative_point", cloud_tablet()->cumulative_layer_point()) .tag("num_rowsets", cloud_tablet()->fetch_add_approximate_num_rowsets(0)) @@ -201,10 +205,14 @@ Status CloudCumulativeCompaction::execute_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size) .tag("output_rows", _output_rowset->num_rows()) .tag("output_segments", _output_rowset->num_segments()) - .tag("output_data_size", _output_rowset->data_disk_size()) + .tag("output_rowset_data_size", _output_rowset->data_disk_size()) + .tag("output_rowset_index_size", _output_rowset->index_disk_size()) + .tag("output_rowset_total_size", _output_rowset->total_disk_size()) .tag("tablet_max_version", _tablet->max_version_unlocked()) .tag("cumulative_point", cloud_tablet()->cumulative_layer_point()) .tag("num_rowsets", cloud_tablet()->fetch_add_approximate_num_rowsets(0)) @@ -213,8 +221,9 @@ Status CloudCumulativeCompaction::execute_compact() { _state = CompactionState::SUCCESS; DorisMetrics::instance()->cumulative_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->cumulative_compaction_bytes_total->increment(_input_rowsets_size); - cumu_output_size << _output_rowset->data_disk_size(); + DorisMetrics::instance()->cumulative_compaction_bytes_total->increment( + _input_rowsets_total_size); + cumu_output_size << _output_rowset->total_disk_size(); return Status::OK(); } @@ -243,8 +252,8 @@ Status CloudCumulativeCompaction::modify_rowsets() { compaction_job->set_output_cumulative_point(new_cumulative_point); compaction_job->set_num_input_rows(_input_row_num); compaction_job->set_num_output_rows(_output_rowset->num_rows()); - compaction_job->set_size_input_rowsets(_input_rowsets_size); - compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size()); + compaction_job->set_size_input_rowsets(_input_rowsets_total_size); + compaction_job->set_size_output_rowsets(_output_rowset->total_disk_size()); compaction_job->set_num_input_segments(_input_segments); compaction_job->set_num_output_segments(_output_rowset->num_segments()); compaction_job->set_num_input_rowsets(_input_rowsets.size()); diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.cpp b/be/src/cloud/cloud_cumulative_compaction_policy.cpp index f9af469e56f..5a9879387b2 100644 --- a/be/src/cloud/cloud_cumulative_compaction_policy.cpp +++ b/be/src/cloud/cloud_cumulative_compaction_policy.cpp @@ -209,7 +209,7 @@ int64_t CloudSizeBasedCumulativeCompactionPolicy::new_cumulative_point( // if rowsets have no delete version, check output_rowset total disk size satisfies promotion size. return output_rowset->start_version() == last_cumulative_point && (last_delete_version.first != -1 || - output_rowset->data_disk_size() >= cloud_promotion_size(tablet) || + output_rowset->total_disk_size() >= cloud_promotion_size(tablet) || satisfy_promotion_version) ? output_rowset->end_version() + 1 : last_cumulative_point; diff --git a/be/src/cloud/cloud_full_compaction.cpp b/be/src/cloud/cloud_full_compaction.cpp index f22c449223c..c27b728c93d 100644 --- a/be/src/cloud/cloud_full_compaction.cpp +++ b/be/src/cloud/cloud_full_compaction.cpp @@ -98,7 +98,9 @@ Status CloudFullCompaction::prepare_compact() { for (auto& rs : _input_rowsets) { _input_row_num += rs->num_rows(); _input_segments += rs->num_segments(); - _input_rowsets_size += rs->data_disk_size(); + _input_rowsets_data_size += rs->data_disk_size(); + _input_rowsets_index_size += rs->index_disk_size(); + _input_rowsets_total_size += rs->total_disk_size(); } LOG_INFO("start CloudFullCompaction, tablet_id={}, range=[{}-{}]", _tablet->tablet_id(), _input_rowsets.front()->start_version(), _input_rowsets.back()->end_version()) @@ -106,7 +108,9 @@ Status CloudFullCompaction::prepare_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size); + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size); return st; } @@ -162,16 +166,20 @@ Status CloudFullCompaction::execute_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size) .tag("output_rows", _output_rowset->num_rows()) .tag("output_segments", _output_rowset->num_segments()) - .tag("output_data_size", _output_rowset->data_disk_size()); + .tag("output_rowset_data_size", _output_rowset->data_disk_size()) + .tag("output_rowset_index_size", _output_rowset->index_disk_size()) + .tag("output_rowset_total_size", _output_rowset->total_disk_size()); _state = CompactionState::SUCCESS; DorisMetrics::instance()->full_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->full_compaction_bytes_total->increment(_input_rowsets_size); - full_output_size << _output_rowset->data_disk_size(); + DorisMetrics::instance()->full_compaction_bytes_total->increment(_input_rowsets_total_size); + full_output_size << _output_rowset->total_disk_size(); return Status::OK(); } @@ -193,8 +201,8 @@ Status CloudFullCompaction::modify_rowsets() { compaction_job->set_output_cumulative_point(_output_rowset->end_version() + 1); compaction_job->set_num_input_rows(_input_row_num); compaction_job->set_num_output_rows(_output_rowset->num_rows()); - compaction_job->set_size_input_rowsets(_input_rowsets_size); - compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size()); + compaction_job->set_size_input_rowsets(_input_rowsets_total_size); + compaction_job->set_size_output_rowsets(_output_rowset->total_disk_size()); DBUG_EXECUTE_IF("CloudFullCompaction::modify_rowsets.wrong_compaction_data_size", { compaction_job->set_size_input_rowsets(1); compaction_job->set_size_output_rowsets(10000001); @@ -345,7 +353,7 @@ Status CloudFullCompaction::_cloud_full_compaction_update_delete_bitmap(int64_t .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size) .tag("update_bitmap_size", delete_bitmap->delete_bitmap.size()); _tablet->tablet_meta()->delete_bitmap().merge(*delete_bitmap); return Status::OK(); diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index 7dc9a4f11a1..57f3c7f8009 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -27,6 +27,7 @@ #include <algorithm> #include <atomic> #include <chrono> +#include <cstdint> #include <memory> #include <mutex> #include <random> @@ -51,6 +52,7 @@ #include "olap/olap_common.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_factory.h" +#include "olap/rowset/rowset_fwd.h" #include "olap/storage_engine.h" #include "olap/tablet_meta.h" #include "runtime/client_cache.h" @@ -750,6 +752,7 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta& rs_meta, Status ret_st; TEST_INJECTION_POINT_RETURN_WITH_VALUE("CloudMetaMgr::commit_rowset", ret_st); } + check_table_size_correctness(rs_meta); CreateRowsetRequest req; CreateRowsetResponse resp; req.set_cloud_unique_id(config::cloud_unique_id); @@ -1125,4 +1128,124 @@ Status CloudMetaMgr::remove_old_version_delete_bitmap( return st; } +void CloudMetaMgr::check_table_size_correctness(const RowsetMeta& rs_meta) { + if (!config::enable_table_size_correctness_check) { + return; + } + int64_t total_segment_size = get_segment_file_size(rs_meta); + int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta); + if (rs_meta.data_disk_size() != total_segment_size || + rs_meta.index_disk_size() != total_inverted_index_size || + rs_meta.data_disk_size() + rs_meta.index_disk_size() != rs_meta.total_disk_size()) { + LOG(WARNING) << "[Cloud table table size check failed]:" + << " tablet id: " << rs_meta.tablet_id() + << ", rowset id:" << rs_meta.rowset_id() + << ", rowset data disk size:" << rs_meta.data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta.index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta.total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path(rs_meta.tablet_id(), + rs_meta.rowset_id().to_string(), 0); + DCHECK(false); + } +} + +int64_t CloudMetaMgr::get_segment_file_size(const RowsetMeta& rs_meta) { + int64_t total_segment_size = 0; + const auto fs = const_cast<RowsetMeta&>(rs_meta).fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta.resource_id(); + } + for (int64_t seg_id = 0; seg_id < rs_meta.num_segments(); seg_id++) { + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + int64_t segment_file_size = 0; + auto st = fs->file_size(segment_path, &segment_file_size); + if (!st.ok()) { + segment_file_size = 0; + if (st.is<FILE_NOT_EXIST>()) { + LOG(INFO) << "cloud table size correctness check get segment size 0 because " + "file not exist! msg:" + << st.msg() << ", segment path:" << segment_path; + } else { + LOG(WARNING) << "cloud table size correctness check get segment size failed! msg:" + << st.msg() << ", segment path:" << segment_path; + } + } + total_segment_size += segment_file_size; + } + return total_segment_size; +} + +int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) { + int64_t total_inverted_index_size = 0; + const auto fs = const_cast<RowsetMeta&>(rs_meta).fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta.resource_id(); + } + if (rs_meta.tablet_schema()->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + auto indices = rs_meta.tablet_schema()->indexes(); + for (auto& index : indices) { + // only get file_size for inverted index + if (index.index_type() != IndexType::INVERTED) { + continue; + } + for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) { + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + int64_t file_size = 0; + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), + index.index_id(), index.get_index_suffix()); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + if (st.is<FILE_NOT_EXIST>()) { + LOG(INFO) << "cloud table size correctness check get inverted index v1 " + "0 because file not exist! msg:" + << st.msg() + << ", inverted index path:" << inverted_index_file_path; + } else { + LOG(WARNING) + << "cloud table size correctness check get inverted index v1 " + "size failed! msg:" + << st.msg() << ", inverted index path:" << inverted_index_file_path; + } + } + total_inverted_index_size += file_size; + } + } + } else { + for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) { + int64_t file_size = 0; + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + + std::string inverted_index_file_path = InvertedIndexDescriptor::get_index_file_path_v2( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + if (st.is<FILE_NOT_EXIST>()) { + LOG(INFO) << "cloud table size correctness check get inverted index v2 " + "0 because file not exist! msg:" + << st.msg() << ", inverted index path:" << inverted_index_file_path; + } else { + LOG(WARNING) << "cloud table size correctness check get inverted index v2 " + "size failed! msg:" + << st.msg() + << ", inverted index path:" << inverted_index_file_path; + } + } + total_inverted_index_size += file_size; + } + } + return total_inverted_index_size; +} + } // namespace doris::cloud diff --git a/be/src/cloud/cloud_meta_mgr.h b/be/src/cloud/cloud_meta_mgr.h index 79cdb3fd3d1..a48381f056e 100644 --- a/be/src/cloud/cloud_meta_mgr.h +++ b/be/src/cloud/cloud_meta_mgr.h @@ -113,6 +113,9 @@ private: Status sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_max_version, std::ranges::range auto&& rs_metas, const TabletStatsPB& stats, const TabletIndexPB& idx, DeleteBitmap* delete_bitmap); + void check_table_size_correctness(const RowsetMeta& rs_meta); + int64_t get_segment_file_size(const RowsetMeta& rs_meta); + int64_t get_inverted_index_file_szie(const RowsetMeta& rs_meta); }; } // namespace cloud diff --git a/be/src/cloud/cloud_rowset_builder.cpp b/be/src/cloud/cloud_rowset_builder.cpp index 192da0f17ef..2e6764b33aa 100644 --- a/be/src/cloud/cloud_rowset_builder.cpp +++ b/be/src/cloud/cloud_rowset_builder.cpp @@ -106,7 +106,7 @@ void CloudRowsetBuilder::update_tablet_stats() { tablet->fetch_add_approximate_num_rowsets(1); tablet->fetch_add_approximate_num_segments(_rowset->num_segments()); tablet->fetch_add_approximate_num_rows(_rowset->num_rows()); - tablet->fetch_add_approximate_data_size(_rowset->data_disk_size()); + tablet->fetch_add_approximate_data_size(_rowset->total_disk_size()); tablet->fetch_add_approximate_cumu_num_rowsets(1); tablet->fetch_add_approximate_cumu_num_deltas(_rowset->num_segments()); tablet->write_count.fetch_add(1, std::memory_order_relaxed); diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index b7e3be93e85..896804578d7 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -344,7 +344,7 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam sc_job->add_txn_ids(rs->txn_id()); sc_job->add_output_versions(rs->end_version()); num_output_rows += rs->num_rows(); - size_output_rowsets += rs->data_disk_size(); + size_output_rowsets += rs->total_disk_size(); num_output_segments += rs->num_segments(); } sc_job->set_num_output_rows(num_output_rows); diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index b944db87030..d3b131d055d 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -412,7 +412,7 @@ int CloudTablet::delete_expired_stale_rowsets() { void CloudTablet::update_base_size(const Rowset& rs) { // Define base rowset as the rowset of version [2-x] if (rs.start_version() == 2) { - _base_size = rs.data_disk_size(); + _base_size = rs.total_disk_size(); } } diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 2fdebbd09c2..d8308c7eb97 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1356,6 +1356,8 @@ DEFINE_mInt32(check_score_rounds_num, "1000"); DEFINE_Int32(query_cache_size, "512"); DEFINE_mBool(enable_delete_bitmap_merge_on_compaction, "false"); +// Enable validation to check the correctness of table size. +DEFINE_Bool(enable_table_size_correctness_check, "false"); // clang-format off #ifdef BE_TEST diff --git a/be/src/common/config.h b/be/src/common/config.h index 791ca0b5e1a..f827e0f7dae 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1443,6 +1443,8 @@ DECLARE_mInt32(check_score_rounds_num); DECLARE_Int32(query_cache_size); DECLARE_mBool(enable_delete_bitmap_merge_on_compaction); +// Enable validation to check the correctness of table size. +DECLARE_Bool(enable_table_size_correctness_check); #ifdef BE_TEST // test s3 diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index 8be29383c1e..8b9cbd75ed3 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -80,7 +80,7 @@ Status BaseCompaction::execute_compact() { tablet()->set_last_base_compaction_success_time(UnixMillis()); DorisMetrics::instance()->base_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_size); + DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_total_size); return Status::OK(); } diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 14769bc315a..a76a5d8679d 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -257,10 +257,10 @@ int64_t Compaction::get_avg_segment_rows() { if (meta->compaction_policy() == CUMULATIVE_TIME_SERIES_POLICY) { int64_t compaction_goal_size_mbytes = meta->time_series_compaction_goal_size_mbytes(); return (compaction_goal_size_mbytes * 1024 * 1024 * 2) / - (_input_rowsets_size / (_input_row_num + 1) + 1); + (_input_rowsets_data_size / (_input_row_num + 1) + 1); } return config::vertical_compaction_max_segment_size / - (_input_rowsets_size / (_input_row_num + 1) + 1); + (_input_rowsets_data_size / (_input_row_num + 1) + 1); } CompactionMixin::CompactionMixin(StorageEngine& engine, TabletSharedPtr tablet, @@ -305,9 +305,9 @@ Status CompactionMixin::do_compact_ordered_rowsets() { // build output rowset RowsetMetaSharedPtr rowset_meta = std::make_shared<RowsetMeta>(); rowset_meta->set_num_rows(_input_row_num); - rowset_meta->set_total_disk_size(_input_rowsets_size); - rowset_meta->set_data_disk_size(_input_rowsets_size); - rowset_meta->set_index_disk_size(_input_index_size); + rowset_meta->set_total_disk_size(_input_rowsets_data_size + _input_rowsets_index_size); + rowset_meta->set_data_disk_size(_input_rowsets_data_size); + rowset_meta->set_index_disk_size(_input_rowsets_index_size); rowset_meta->set_empty(_input_row_num == 0); rowset_meta->set_num_segments(_input_num_segments); rowset_meta->set_segments_overlap(NONOVERLAPPING); @@ -320,12 +320,13 @@ Status CompactionMixin::do_compact_ordered_rowsets() { void CompactionMixin::build_basic_info() { for (auto& rowset : _input_rowsets) { - _input_rowsets_size += rowset->data_disk_size(); - _input_index_size += rowset->index_disk_size(); + _input_rowsets_data_size += rowset->data_disk_size(); + _input_rowsets_index_size += rowset->index_disk_size(); + _input_rowsets_total_size += rowset->total_disk_size(); _input_row_num += rowset->num_rows(); _input_num_segments += rowset->num_segments(); } - COUNTER_UPDATE(_input_rowsets_data_size_counter, _input_rowsets_size); + COUNTER_UPDATE(_input_rowsets_data_size_counter, _input_rowsets_data_size); COUNTER_UPDATE(_input_row_num_counter, _input_row_num); COUNTER_UPDATE(_input_segments_num_counter, _input_num_segments); @@ -444,8 +445,12 @@ Status CompactionMixin::execute_compact_impl(int64_t permits) { << ", disk=" << tablet()->data_dir()->path() << ", segments=" << _input_num_segments << ", input_row_num=" << _input_row_num << ", output_row_num=" << _output_rowset->num_rows() - << ", input_rowset_size=" << _input_rowsets_size - << ", output_rowset_size=" << _output_rowset->data_disk_size() + << ", input_rowsets_data_size=" << _input_rowsets_data_size + << ", input_rowsets_index_size=" << _input_rowsets_index_size + << ", input_rowsets_total_size=" << _input_rowsets_total_size + << ", output_rowset_data_size=" << _output_rowset->data_disk_size() + << ", output_rowset_index_size=" << _output_rowset->index_disk_size() + << ", output_rowset_total_size=" << _output_rowset->total_disk_size() << ". elapsed time=" << watch.get_elapse_second() << "s."; _state = CompactionState::SUCCESS; return Status::OK(); @@ -467,8 +472,8 @@ Status CompactionMixin::execute_compact_impl(int64_t permits) { << ". tablet=" << _tablet->tablet_id() << ", output_version=" << _output_version << ", current_max_version=" << tablet()->max_version().second << ", disk=" << tablet()->data_dir()->path() << ", segments=" << _input_num_segments - << ", input_rowset_size=" << _input_rowsets_size - << ", output_rowset_size=" << _output_rowset->data_disk_size() + << ", input_data_size=" << _input_rowsets_data_size + << ", output_rowset_size=" << _output_rowset->total_disk_size() << ", input_row_num=" << _input_row_num << ", output_row_num=" << _output_rowset->num_rows() << ", filtered_row_num=" << _stats.filtered_rows diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h index 13a37beca19..06ef4268529 100644 --- a/be/src/olap/compaction.h +++ b/be/src/olap/compaction.h @@ -90,10 +90,11 @@ protected: BaseTabletSPtr _tablet; std::vector<RowsetSharedPtr> _input_rowsets; - int64_t _input_rowsets_size {0}; + int64_t _input_rowsets_data_size {0}; + int64_t _input_rowsets_index_size {0}; + int64_t _input_rowsets_total_size {0}; int64_t _input_row_num {0}; int64_t _input_num_segments {0}; - int64_t _input_index_size {0}; Merger::Statistics _stats; diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index b762468b345..b961c694ede 100644 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -125,7 +125,8 @@ Status CumulativeCompaction::execute_compact() { tablet()->set_last_cumu_compaction_success_time(UnixMillis()); } DorisMetrics::instance()->cumulative_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->cumulative_compaction_bytes_total->increment(_input_rowsets_size); + DorisMetrics::instance()->cumulative_compaction_bytes_total->increment( + _input_rowsets_total_size); return Status::OK(); } diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index ee1605a3043..4b51dcc3530 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -498,7 +498,7 @@ Status BetaRowset::upload_to(const StorageResource& dest_fs, const RowsetId& new auto st = dest_fs.fs->batch_upload(local_paths, dest_paths); if (st.ok()) { DorisMetrics::instance()->upload_rowset_count->increment(1); - DorisMetrics::instance()->upload_total_byte->increment(data_disk_size()); + DorisMetrics::instance()->upload_total_byte->increment(total_disk_size()); } else { DorisMetrics::instance()->upload_fail_count->increment(1); } diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 3f60e7c5674..548b1950b81 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -81,7 +81,7 @@ void build_rowset_meta_with_spec_field(RowsetMeta& rowset_meta, const RowsetMeta& spec_rowset_meta) { rowset_meta.set_num_rows(spec_rowset_meta.num_rows()); rowset_meta.set_total_disk_size(spec_rowset_meta.total_disk_size()); - rowset_meta.set_data_disk_size(spec_rowset_meta.total_disk_size()); + rowset_meta.set_data_disk_size(spec_rowset_meta.data_disk_size()); rowset_meta.set_index_disk_size(spec_rowset_meta.index_disk_size()); // TODO write zonemap to meta rowset_meta.set_empty(spec_rowset_meta.num_rows() == 0); @@ -886,7 +886,8 @@ Status BaseBetaRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool ch rowset_meta->set_num_segments(segment_num); rowset_meta->set_num_rows(num_rows_written + _num_rows_written); - rowset_meta->set_total_disk_size(total_data_size + _total_data_size); + rowset_meta->set_total_disk_size(total_data_size + _total_data_size + total_index_size + + _total_index_size); rowset_meta->set_data_disk_size(total_data_size + _total_data_size); rowset_meta->set_index_disk_size(total_index_size + _total_index_size); rowset_meta->set_segments_key_bounds(segments_encoded_key_bounds); @@ -1089,8 +1090,8 @@ Status BetaRowsetWriter::flush_segment_writer_for_segcompaction( SegmentStatistics segstat; segstat.row_num = row_num; - segstat.data_size = segment_size + inverted_index_file_size; - segstat.index_size = index_size + inverted_index_file_size; + segstat.data_size = segment_size; + segstat.index_size = inverted_index_file_size; segstat.key_bounds = key_bounds; { std::lock_guard<std::mutex> lock(_segid_statistics_map_mutex); diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h index 47e12a531e9..4539959fab5 100644 --- a/be/src/olap/rowset/beta_rowset_writer.h +++ b/be/src/olap/rowset/beta_rowset_writer.h @@ -223,7 +223,6 @@ protected: RETURN_NOT_OK_STATUS_WITH_WARN(_idx_files.close(), "failed to close index file when build new rowset"); this->_total_index_size += _idx_files.get_total_index_size(); - this->_total_data_size += _idx_files.get_total_index_size(); return Status::OK(); } diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index 24e660cd2f7..e1a2347f6ae 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -149,7 +149,8 @@ public: int64_t start_version() const { return rowset_meta()->version().first; } int64_t end_version() const { return rowset_meta()->version().second; } size_t index_disk_size() const { return rowset_meta()->index_disk_size(); } - size_t data_disk_size() const { return rowset_meta()->total_disk_size(); } + size_t data_disk_size() const { return rowset_meta()->data_disk_size(); } + size_t total_disk_size() const { return rowset_meta()->total_disk_size(); } bool empty() const { return rowset_meta()->empty(); } bool zero_num_rows() const { return rowset_meta()->num_rows() == 0; } size_t num_rows() const { return rowset_meta()->num_rows(); } diff --git a/be/src/olap/rowset/rowset_meta.cpp b/be/src/olap/rowset/rowset_meta.cpp index 1571105fa73..6bed5e800ed 100644 --- a/be/src/olap/rowset/rowset_meta.cpp +++ b/be/src/olap/rowset/rowset_meta.cpp @@ -226,6 +226,7 @@ void RowsetMeta::merge_rowset_meta(const RowsetMeta& other) { set_data_disk_size(data_disk_size() + other.data_disk_size()); set_total_disk_size(total_disk_size() + other.total_disk_size()); set_index_disk_size(index_disk_size() + other.index_disk_size()); + set_total_disk_size(data_disk_size() + index_disk_size()); for (auto&& key_bound : other.get_segments_key_bounds()) { add_segment_key_bounds(key_bound); } diff --git a/be/src/olap/rowset/segment_creator.cpp b/be/src/olap/rowset/segment_creator.cpp index 5f4a3dce7b8..e0eb7534123 100644 --- a/be/src/olap/rowset/segment_creator.cpp +++ b/be/src/olap/rowset/segment_creator.cpp @@ -225,9 +225,9 @@ Status SegmentFlusher::_flush_segment_writer( if (row_num == 0) { return Status::OK(); } - uint64_t segment_size; - uint64_t index_size; - Status s = writer->finalize(&segment_size, &index_size); + uint64_t segment_file_size; + uint64_t common_index_size; + Status s = writer->finalize(&segment_file_size, &common_index_size); if (!s.ok()) { return Status::Error(s.code(), "failed to finalize segment: {}", s.to_string()); } @@ -249,16 +249,20 @@ Status SegmentFlusher::_flush_segment_writer( uint32_t segment_id = writer->segment_id(); SegmentStatistics segstat; segstat.row_num = row_num; - segstat.data_size = segment_size + inverted_index_file_size; - segstat.index_size = index_size + inverted_index_file_size; + segstat.data_size = segment_file_size; + segstat.index_size = inverted_index_file_size; segstat.key_bounds = key_bounds; + LOG(INFO) << "tablet_id:" << _context.tablet_id + << ", flushing rowset_dir: " << _context.tablet_path + << ", rowset_id:" << _context.rowset_id << ", data size:" << segstat.data_size + << ", index size:" << segstat.index_size; writer.reset(); RETURN_IF_ERROR(_context.segment_collector->add(segment_id, segstat, flush_schema)); if (flush_size) { - *flush_size = segment_size + inverted_index_file_size; + *flush_size = segment_file_size; } return Status::OK(); } @@ -274,9 +278,9 @@ Status SegmentFlusher::_flush_segment_writer(std::unique_ptr<segment_v2::Segment if (row_num == 0) { return Status::OK(); } - uint64_t segment_size; - uint64_t index_size; - Status s = writer->finalize(&segment_size, &index_size); + uint64_t segment_file_size; + uint64_t common_index_size; + Status s = writer->finalize(&segment_file_size, &common_index_size); if (!s.ok()) { return Status::Error(s.code(), "failed to finalize segment: {}", s.to_string()); } @@ -298,16 +302,20 @@ Status SegmentFlusher::_flush_segment_writer(std::unique_ptr<segment_v2::Segment uint32_t segment_id = writer->get_segment_id(); SegmentStatistics segstat; segstat.row_num = row_num; - segstat.data_size = segment_size + inverted_index_file_size; - segstat.index_size = index_size + inverted_index_file_size; + segstat.data_size = segment_file_size; + segstat.index_size = inverted_index_file_size; segstat.key_bounds = key_bounds; + LOG(INFO) << "tablet_id:" << _context.tablet_id + << ", flushing rowset_dir: " << _context.tablet_path + << ", rowset_id:" << _context.rowset_id << ", data size:" << segstat.data_size + << ", index size:" << segstat.index_size; writer.reset(); RETURN_IF_ERROR(_context.segment_collector->add(segment_id, segstat, flush_schema)); if (flush_size) { - *flush_size = segment_size + inverted_index_file_size; + *flush_size = segment_file_size; } return Status::OK(); } diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index fb8f6622685..46070f8dccd 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -138,7 +138,6 @@ Status VerticalBetaRowsetWriter<T>::_flush_columns(segment_v2::SegmentWriter* se this->_segment_num_rows.resize(_cur_writer_idx + 1); this->_segment_num_rows[_cur_writer_idx] = _segment_writers[_cur_writer_idx]->row_count(); } - this->_total_index_size += static_cast<int64_t>(index_size); return Status::OK(); } diff --git a/be/src/olap/single_replica_compaction.cpp b/be/src/olap/single_replica_compaction.cpp index ef93ab25cae..7470afe0ef6 100644 --- a/be/src/olap/single_replica_compaction.cpp +++ b/be/src/olap/single_replica_compaction.cpp @@ -149,11 +149,15 @@ Status SingleReplicaCompaction::_do_single_replica_compaction_impl() { LOG(INFO) << "succeed to do single replica compaction" << ". tablet=" << _tablet->tablet_id() << ", output_version=" << _output_version << ", current_max_version=" << current_max_version - << ", input_rowset_size=" << _input_rowsets_size + << ", input_rowsets_data_size=" << _input_rowsets_data_size + << ", input_rowsets_index_size=" << _input_rowsets_index_size + << ", input_rowsets_total_size=" << _input_rowsets_total_size << ", input_row_num=" << _input_row_num << ", input_segments_num=" << _input_num_segments - << ", _input_index_size=" << _input_index_size + << ", _input_index_size=" << _input_rowsets_index_size << ", output_rowset_data_size=" << _output_rowset->data_disk_size() + << ", output_rowset_index_size=" << _output_rowset->index_disk_size() + << ", output_rowset_total_size=" << _output_rowset->total_disk_size() << ", output_row_num=" << _output_rowset->num_rows() << ", output_segments_num=" << _output_rowset->num_segments(); return Status::OK(); @@ -264,10 +268,11 @@ bool SingleReplicaCompaction::_find_rowset_to_fetch(const std::vector<Version>& return false; } for (auto& rowset : _input_rowsets) { - _input_rowsets_size += rowset->data_disk_size(); + _input_rowsets_data_size += rowset->data_disk_size(); _input_row_num += rowset->num_rows(); _input_num_segments += rowset->num_segments(); - _input_index_size += rowset->index_disk_size(); + _input_rowsets_index_size += rowset->index_disk_size(); + _input_rowsets_total_size += rowset->data_disk_size() + rowset->index_disk_size(); } _output_version = *proper_version; } diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 450f3d2cb8b..7c69ba54831 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -26,6 +26,7 @@ #include <gen_cpp/Metrics_types.h> #include <gen_cpp/olap_file.pb.h> #include <gen_cpp/types.pb.h> +#include <glog/logging.h> #include <rapidjson/document.h> #include <rapidjson/encodings.h> #include <rapidjson/prettywriter.h> @@ -35,6 +36,7 @@ #include <algorithm> #include <atomic> #include <boost/container/detail/std_fwd.hpp> +#include <cstdint> #include <roaring/roaring.hh> #include "common/compiler_util.h" // IWYU pragma: keep @@ -86,6 +88,7 @@ #include "olap/rowset/beta_rowset.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_factory.h" +#include "olap/rowset/rowset_fwd.h" #include "olap/rowset/rowset_meta.h" #include "olap/rowset/rowset_meta_manager.h" #include "olap/rowset/rowset_writer.h" @@ -329,6 +332,7 @@ Status Tablet::init() { // should save tablet meta to remote meta store // if it's a primary replica void Tablet::save_meta() { + check_table_size_correctness(); auto res = _tablet_meta->save_meta(_data_dir); CHECK_EQ(res, Status::OK()) << "fail to save tablet_meta. res=" << res << ", root=" << _data_dir->path(); @@ -2047,8 +2051,8 @@ Status Tablet::_cooldown_data(RowsetSharedPtr rowset) { LOG(INFO) << "Upload rowset " << old_rowset->version() << " " << new_rowset_id.to_string() << " to " << storage_resource.fs->root_path().native() << ", tablet_id=" << tablet_id() << ", duration=" << duration.count() - << ", capacity=" << old_rowset->data_disk_size() - << ", tp=" << old_rowset->data_disk_size() / duration.count() + << ", capacity=" << old_rowset->total_disk_size() + << ", tp=" << old_rowset->total_disk_size() / duration.count() << ", old rowset_id=" << old_rowset->rowset_id().to_string(); // gen a new rowset @@ -2427,7 +2431,7 @@ RowsetSharedPtr Tablet::need_cooldown(int64_t* cooldown_timestamp, size_t* file_ // current time or it's datatime is less than current time if (newest_cooldown_time != 0 && newest_cooldown_time < UnixSeconds()) { *cooldown_timestamp = newest_cooldown_time; - *file_size = rowset->data_disk_size(); + *file_size = rowset->total_disk_size(); VLOG_DEBUG << "tablet need cooldown, tablet id: " << tablet_id() << " file_size: " << *file_size; return rowset; @@ -2737,4 +2741,120 @@ void Tablet::clear_cache() { } } +void Tablet::check_table_size_correctness() { + if (!config::enable_table_size_correctness_check) { + return; + } + const std::vector<RowsetMetaSharedPtr>& all_rs_metas = _tablet_meta->all_rs_metas(); + for (const auto& rs_meta : all_rs_metas) { + int64_t total_segment_size = get_segment_file_size(rs_meta); + int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta); + if (rs_meta->data_disk_size() != total_segment_size || + rs_meta->index_disk_size() != total_inverted_index_size || + rs_meta->data_disk_size() + rs_meta->index_disk_size() != rs_meta->total_disk_size()) { + LOG(WARNING) << "[Local table table size check failed]:" + << " tablet id: " << rs_meta->tablet_id() + << ", rowset id:" << rs_meta->rowset_id() + << ", rowset data disk size:" << rs_meta->data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta->index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta->total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path( + rs_meta->tablet_id(), rs_meta->rowset_id().to_string(), 0); + DCHECK(false); + } + } +} + +std::string Tablet::get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t seg_id) { + std::string segment_path; + if (rs_meta->is_local()) { + segment_path = local_segment_path(_tablet_path, rs_meta->rowset_id().to_string(), seg_id); + } else { + segment_path = rs_meta->remote_storage_resource().value()->remote_segment_path( + rs_meta->tablet_id(), rs_meta->rowset_id().to_string(), seg_id); + } + return segment_path; +} + +int64_t Tablet::get_segment_file_size(const RowsetMetaSharedPtr& rs_meta) { + const auto& fs = rs_meta->fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta->resource_id(); + } + int64_t total_segment_size = 0; + for (int64_t seg_id = 0; seg_id < rs_meta->num_segments(); seg_id++) { + std::string segment_path = get_segment_path(rs_meta, seg_id); + int64_t segment_file_size = 0; + auto st = fs->file_size(segment_path, &segment_file_size); + if (!st.ok()) { + segment_file_size = 0; + LOG(WARNING) << "table size correctness check get segment size failed! msg:" + << st.to_string() << ", segment path:" << segment_path; + } + total_segment_size += segment_file_size; + } + return total_segment_size; +} + +int64_t Tablet::get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta) { + const auto& fs = rs_meta->fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta->resource_id(); + } + int64_t total_inverted_index_size = 0; + + if (rs_meta->tablet_schema()->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + auto indices = rs_meta->tablet_schema()->indexes(); + for (auto& index : indices) { + // only get file_size for inverted index + if (index.index_type() != IndexType::INVERTED) { + continue; + } + for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) { + std::string segment_path = get_segment_path(rs_meta, seg_id); + int64_t file_size = 0; + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), + index.index_id(), index.get_index_suffix()); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + LOG(WARNING) << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", table size correctness check get inverted index v1 " + "size failed! msg:" + << st.to_string() + << ", inverted index path:" << inverted_index_file_path; + } + total_inverted_index_size += file_size; + } + } + } else { + for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) { + int64_t file_size = 0; + std::string segment_path = get_segment_path(rs_meta, seg_id); + std::string inverted_index_file_path = InvertedIndexDescriptor::get_index_file_path_v2( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + LOG(WARNING) << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", table size correctness check get inverted index v2 " + "size failed! msg:" + << st.to_string() + << ", inverted index path:" << inverted_index_file_path; + } + total_inverted_index_size += file_size; + } + } + return total_inverted_index_size; +} + } // namespace doris diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 2d7975b0fc1..e181af3d4d3 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -534,6 +534,10 @@ private: //////////////////////////////////////////////////////////////////////////// void _clear_cache_by_rowset(const BetaRowsetSharedPtr& rowset); + void check_table_size_correctness(); + std::string get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t seg_id); + int64_t get_segment_file_size(const RowsetMetaSharedPtr& rs_meta); + int64_t get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta); public: static const int64_t K_INVALID_CUMULATIVE_POINT = -1; diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 394aeb17b85..d56e529e42b 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -642,7 +642,7 @@ inline size_t TabletMeta::num_rows() const { inline size_t TabletMeta::tablet_footprint() const { size_t total_size = 0; for (auto& rs : _rs_metas) { - total_size += rs->data_disk_size(); + total_size += rs->total_disk_size(); } return total_size; } @@ -651,7 +651,7 @@ inline size_t TabletMeta::tablet_local_size() const { size_t total_size = 0; for (auto& rs : _rs_metas) { if (rs->is_local()) { - total_size += rs->data_disk_size(); + total_size += rs->total_disk_size(); } } return total_size; @@ -661,7 +661,7 @@ inline size_t TabletMeta::tablet_remote_size() const { size_t total_size = 0; for (auto& rs : _rs_metas) { if (!rs->is_local()) { - total_size += rs->data_disk_size(); + total_size += rs->total_disk_size(); } } return total_size; diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index d0c4b0e45f4..05ecfc0401b 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -93,7 +93,7 @@ Status EngineChecksumTask::_compute_checksum() { } size_t input_size = 0; for (const auto& rowset : input_rowsets) { - input_size += rowset->data_disk_size(); + input_size += rowset->total_disk_size(); } auto res = reader.init(reader_params); diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 2ecadfa53b0..09cbdeadb3f 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -207,13 +207,12 @@ Status IndexBuilder::update_inverted_index_info() { InvertedIndexStorageFormatPB::V1) { if (_is_drop_op) { VLOG_DEBUG << "data_disk_size:" << input_rowset_meta->data_disk_size() - << " total_disk_size:" << input_rowset_meta->data_disk_size() + << " total_disk_size:" << input_rowset_meta->total_disk_size() << " index_disk_size:" << input_rowset_meta->index_disk_size() << " drop_index_size:" << drop_index_size; rowset_meta->set_total_disk_size(input_rowset_meta->total_disk_size() - drop_index_size); - rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size() - - drop_index_size); + rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size()); rowset_meta->set_index_disk_size(input_rowset_meta->index_disk_size() - drop_index_size); } else { @@ -238,7 +237,7 @@ Status IndexBuilder::update_inverted_index_info() { } rowset_meta->set_total_disk_size(input_rowset_meta->total_disk_size() - total_index_size); - rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size() - total_index_size); + rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size()); rowset_meta->set_index_disk_size(input_rowset_meta->index_disk_size() - total_index_size); } @@ -323,8 +322,7 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta inverted_index_size += inverted_index_writer->get_index_file_total_size(); } _inverted_index_file_writers.clear(); - output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size() + - inverted_index_size); + output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size()); output_rowset_meta->set_total_disk_size(output_rowset_meta->total_disk_size() + inverted_index_size); output_rowset_meta->set_index_disk_size(output_rowset_meta->index_disk_size() + @@ -489,8 +487,7 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta } _inverted_index_builders.clear(); _inverted_index_file_writers.clear(); - output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size() + - inverted_index_size); + output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size()); output_rowset_meta->set_total_disk_size(output_rowset_meta->total_disk_size() + inverted_index_size); output_rowset_meta->set_index_disk_size(output_rowset_meta->index_disk_size() + diff --git a/be/src/runtime/load_stream_writer.cpp b/be/src/runtime/load_stream_writer.cpp index 37243fab14b..2e987edc7bd 100644 --- a/be/src/runtime/load_stream_writer.cpp +++ b/be/src/runtime/load_stream_writer.cpp @@ -201,7 +201,7 @@ Status LoadStreamWriter::add_segment(uint32_t segid, const SegmentStatistics& st } DBUG_EXECUTE_IF("LoadStreamWriter.add_segment.size_not_match", { segment_file_size++; }); - if (segment_file_size + inverted_file_size != stat.data_size) { + if (segment_file_size != stat.data_size) { return Status::Corruption( "add_segment failed, segment stat {} does not match, file size={}, inverted file " "size={}, stat.data_size={}, tablet id={}", diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 7caba826520..daeb5ddfee5 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -217,5 +217,4 @@ CONF_Int32(max_tablet_index_num_per_batch, "1000"); // Max aborted txn num for the same label name CONF_mInt64(max_num_aborted_txn, "100"); - } // namespace doris::cloud::config diff --git a/cloud/src/meta-service/meta_service_txn.cpp b/cloud/src/meta-service/meta_service_txn.cpp index cc333c42846..32f6b56f51a 100644 --- a/cloud/src/meta-service/meta_service_txn.cpp +++ b/cloud/src/meta-service/meta_service_txn.cpp @@ -1167,7 +1167,7 @@ void commit_txn_immediately( // Accumulate affected rows auto& stats = tablet_stats[tablet_id]; - stats.data_size += i.data_disk_size(); + stats.data_size += i.total_disk_size(); stats.num_rows += i.num_rows(); ++stats.num_rowsets; stats.num_segs += i.num_segments(); diff --git a/cloud/test/meta_service_http_test.cpp b/cloud/test/meta_service_http_test.cpp index 20dee957126..e49628fcb3a 100644 --- a/cloud/test/meta_service_http_test.cpp +++ b/cloud/test/meta_service_http_test.cpp @@ -320,6 +320,8 @@ static doris::RowsetMetaCloudPB create_rowset(int64_t txn_id, int64_t tablet_id, rowset.set_num_segments(1); rowset.set_num_rows(num_rows); rowset.set_data_disk_size(num_rows * 100); + rowset.set_index_disk_size(num_rows * 10); + rowset.set_total_disk_size(num_rows * 110); rowset.mutable_tablet_schema()->set_schema_version(0); rowset.set_txn_expiration(::time(nullptr)); // Required by DCHECK return rowset; @@ -1285,7 +1287,7 @@ TEST(MetaServiceHttpTest, GetTabletStatsTest) { stats_tablet_data_size_key({mock_instance, table_id, index_id, partition_id, tablet_id}, &data_size_key); ASSERT_EQ(txn->get(data_size_key, &data_size_val), TxnErrorCode::TXN_OK); - EXPECT_EQ(*(int64_t*)data_size_val.data(), 20000); + EXPECT_EQ(*(int64_t*)data_size_val.data(), 22000); std::string num_rows_key, num_rows_val; stats_tablet_num_rows_key({mock_instance, table_id, index_id, partition_id, tablet_id}, &num_rows_key); @@ -1306,7 +1308,7 @@ TEST(MetaServiceHttpTest, GetTabletStatsTest) { get_tablet_stats(meta_service.get(), table_id, index_id, partition_id, tablet_id, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); ASSERT_EQ(res.tablet_stats_size(), 1); - EXPECT_EQ(res.tablet_stats(0).data_size(), 40000); + EXPECT_EQ(res.tablet_stats(0).data_size(), 44000); EXPECT_EQ(res.tablet_stats(0).num_rows(), 400); EXPECT_EQ(res.tablet_stats(0).num_rowsets(), 5); EXPECT_EQ(res.tablet_stats(0).num_segments(), 4); diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp index 3baec482710..ee90e604e1c 100644 --- a/cloud/test/meta_service_test.cpp +++ b/cloud/test/meta_service_test.cpp @@ -178,6 +178,8 @@ static doris::RowsetMetaCloudPB create_rowset(int64_t txn_id, int64_t tablet_id, rowset.set_num_segments(1); rowset.set_num_rows(num_rows); rowset.set_data_disk_size(num_rows * 100); + rowset.set_index_disk_size(num_rows * 10); + rowset.set_total_disk_size(num_rows * 110); rowset.mutable_tablet_schema()->set_schema_version(0); rowset.set_txn_expiration(::time(nullptr)); // Required by DCHECK return rowset; @@ -4429,7 +4431,7 @@ TEST(MetaServiceTest, GetTabletStatsTest) { stats_tablet_data_size_key({mock_instance, table_id, index_id, partition_id, tablet_id}, &data_size_key); ASSERT_EQ(txn->get(data_size_key, &data_size_val), TxnErrorCode::TXN_OK); - EXPECT_EQ(*(int64_t*)data_size_val.data(), 20000); + EXPECT_EQ(*(int64_t*)data_size_val.data(), 22000); std::string num_rows_key, num_rows_val; stats_tablet_num_rows_key({mock_instance, table_id, index_id, partition_id, tablet_id}, &num_rows_key); @@ -4450,7 +4452,7 @@ TEST(MetaServiceTest, GetTabletStatsTest) { get_tablet_stats(meta_service.get(), table_id, index_id, partition_id, tablet_id, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); ASSERT_EQ(res.tablet_stats_size(), 1); - EXPECT_EQ(res.tablet_stats(0).data_size(), 40000); + EXPECT_EQ(res.tablet_stats(0).data_size(), 44000); EXPECT_EQ(res.tablet_stats(0).num_rows(), 400); EXPECT_EQ(res.tablet_stats(0).num_rowsets(), 5); EXPECT_EQ(res.tablet_stats(0).num_segments(), 4); diff --git a/cloud/test/schema_kv_test.cpp b/cloud/test/schema_kv_test.cpp index 69ee9aba442..07f658175c8 100644 --- a/cloud/test/schema_kv_test.cpp +++ b/cloud/test/schema_kv_test.cpp @@ -293,6 +293,8 @@ static doris::RowsetMetaCloudPB create_rowset(int64_t txn_id, int64_t tablet_id, rowset.set_num_rows(100); rowset.set_num_segments(1); rowset.set_data_disk_size(10000); + rowset.set_index_disk_size(1000); + rowset.set_total_disk_size(11000); if (version > 0) { rowset.set_start_version(version); rowset.set_end_version(version); @@ -478,7 +480,7 @@ TEST(DetachSchemaKVTest, RowsetTest) { EXPECT_EQ(get_rowset_res.stats().num_rows(), 100); EXPECT_EQ(get_rowset_res.stats().num_rowsets(), 2); EXPECT_EQ(get_rowset_res.stats().num_segments(), 1); - EXPECT_EQ(get_rowset_res.stats().data_size(), 10000); + EXPECT_EQ(get_rowset_res.stats().data_size(), 11000); } // new MS read rowsets committed by both old and new MS @@ -527,7 +529,7 @@ TEST(DetachSchemaKVTest, RowsetTest) { EXPECT_EQ(get_rowset_res->stats().num_rows(), 2500); EXPECT_EQ(get_rowset_res->stats().num_rowsets(), 26); EXPECT_EQ(get_rowset_res->stats().num_segments(), 25); - EXPECT_EQ(get_rowset_res->stats().data_size(), 250000); + EXPECT_EQ(get_rowset_res->stats().data_size(), 275000); if (schema != nullptr) { auto schema_version = get_rowset_res->rowset_meta(10).schema_version(); get_rowset_res->mutable_rowset_meta(10)->mutable_tablet_schema()->set_schema_version(3); diff --git a/regression-test/pipeline/cloud_p0/conf/be_custom.conf b/regression-test/pipeline/cloud_p0/conf/be_custom.conf index 377a02536c6..5fbf89fd75c 100644 --- a/regression-test/pipeline/cloud_p0/conf/be_custom.conf +++ b/regression-test/pipeline/cloud_p0/conf/be_custom.conf @@ -37,3 +37,4 @@ enable_new_tablet_do_compaction = true arrow_flight_sql_port = 8181 pipeline_task_leakage_detect_period_sec=1 crash_in_memory_tracker_inaccurate = true +enable_table_size_correctness_check=true diff --git a/regression-test/pipeline/cloud_p1/conf/be_custom.conf b/regression-test/pipeline/cloud_p1/conf/be_custom.conf index 4310441a0ed..0d3ae0c526d 100644 --- a/regression-test/pipeline/cloud_p1/conf/be_custom.conf +++ b/regression-test/pipeline/cloud_p1/conf/be_custom.conf @@ -33,3 +33,4 @@ arrow_flight_sql_port = 8181 pipeline_task_leakage_detect_period_sec=1 crash_in_memory_tracker_inaccurate = true enable_new_tablet_do_compaction = true +enable_table_size_correctness_check=true diff --git a/regression-test/pipeline/p0/conf/be.conf b/regression-test/pipeline/p0/conf/be.conf index 745515aed06..760f813ffeb 100644 --- a/regression-test/pipeline/p0/conf/be.conf +++ b/regression-test/pipeline/p0/conf/be.conf @@ -71,3 +71,4 @@ be_proc_monitor_interval_ms = 30000 webserver_num_workers = 128 pipeline_task_leakage_detect_period_sec=1 crash_in_memory_tracker_inaccurate = true +enable_table_size_correctness_check=true diff --git a/regression-test/pipeline/p1/conf/be.conf b/regression-test/pipeline/p1/conf/be.conf index fbb36c340f0..1512bce7ac2 100644 --- a/regression-test/pipeline/p1/conf/be.conf +++ b/regression-test/pipeline/p1/conf/be.conf @@ -63,3 +63,4 @@ enable_missing_rows_correctness_check=true enable_jvm_monitor = true pipeline_task_leakage_detect_period_sec=1 crash_in_memory_tracker_inaccurate = true +enable_table_size_correctness_check=true --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org