This is an automated email from the ASF dual-hosted git repository.
liaoxin pushed a commit to branch cs_opt_version-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/cs_opt_version-3.1 by this
push:
new 4fbb83e46ed cs_opt_version-3.1: [Opt](meta)persist segment rows in
rowse meta pb (#58301)
4fbb83e46ed is described below
commit 4fbb83e46ed332ea81316ea0ded97c22cc03b99e
Author: bobhan1 <[email protected]>
AuthorDate: Thu Nov 27 11:57:38 2025 +0800
cs_opt_version-3.1: [Opt](meta)persist segment rows in rowse meta pb
(#58301)
---
be/src/cloud/pb_convert.cpp | 4 +
be/src/common/config.cpp | 6 +
be/src/common/config.h | 6 +
be/src/olap/parallel_scanner_builder.cpp | 22 ++--
be/src/olap/rowset/beta_rowset.cpp | 88 ++++++++++++++
be/src/olap/rowset/beta_rowset.h | 6 +
be/src/olap/rowset/beta_rowset_writer.cpp | 19 ++++
be/src/olap/rowset/rowset.h | 4 +
be/src/olap/rowset/rowset_meta.cpp | 11 ++
be/src/olap/rowset/rowset_meta.h | 11 ++
be/test/olap/rowset/beta_rowset_test.cpp | 164 ++++++++++++++++++++++++++
be/test/olap/rowset/rowset_meta_test.cpp | 183 +++++++++++++++++++++++++++++-
gensrc/proto/olap_file.proto | 6 +
13 files changed, 516 insertions(+), 14 deletions(-)
diff --git a/be/src/cloud/pb_convert.cpp b/be/src/cloud/pb_convert.cpp
index 7c9ac5f35c9..7d023377346 100644
--- a/be/src/cloud/pb_convert.cpp
+++ b/be/src/cloud/pb_convert.cpp
@@ -76,6 +76,7 @@ void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out, const
RowsetMetaPB& in)
out->set_txn_expiration(in.txn_expiration());
out->set_segments_overlap_pb(in.segments_overlap_pb());
out->set_segments_key_bounds_truncated(in.segments_key_bounds_truncated());
+ out->mutable_num_segment_rows()->CopyFrom(in.num_segment_rows());
out->mutable_segments_file_size()->CopyFrom(in.segments_file_size());
out->set_index_id(in.index_id());
if (in.has_schema_version()) {
@@ -131,6 +132,7 @@ void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out,
RowsetMetaPB&& in) {
out->set_txn_expiration(in.txn_expiration());
out->set_segments_overlap_pb(in.segments_overlap_pb());
out->set_segments_key_bounds_truncated(in.segments_key_bounds_truncated());
+ out->mutable_num_segment_rows()->Swap(in.mutable_num_segment_rows());
out->mutable_segments_file_size()->Swap(in.mutable_segments_file_size());
out->set_index_id(in.index_id());
if (in.has_schema_version()) {
@@ -235,6 +237,7 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const
RowsetMetaCloudPB& in,
out->set_txn_expiration(in.txn_expiration());
out->set_segments_overlap_pb(in.segments_overlap_pb());
out->set_segments_key_bounds_truncated(in.segments_key_bounds_truncated());
+ out->mutable_num_segment_rows()->CopyFrom(in.num_segment_rows());
out->mutable_segments_file_size()->CopyFrom(in.segments_file_size());
out->set_index_id(in.index_id());
if (in.has_schema_version()) {
@@ -293,6 +296,7 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out,
RowsetMetaCloudPB&& in,
out->set_txn_expiration(in.txn_expiration());
out->set_segments_overlap_pb(in.segments_overlap_pb());
out->set_segments_key_bounds_truncated(in.segments_key_bounds_truncated());
+ out->mutable_num_segment_rows()->Swap(in.mutable_num_segment_rows());
out->mutable_segments_file_size()->Swap(in.mutable_segments_file_size());
out->set_index_id(in.index_id());
if (in.has_schema_version()) {
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index d19f6381028..9e8f500908b 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -381,6 +381,12 @@ DEFINE_mInt32(trash_file_expire_time_sec, "0");
// modify them upon necessity
DEFINE_Int32(min_file_descriptor_number, "60000");
DEFINE_mBool(disable_segment_cache, "false");
+// Enable checking segment rows consistency between rowset meta and segment
footer
+DEFINE_mBool(enable_segment_rows_consistency_check, "false");
+DEFINE_mBool(enable_segment_rows_check_core, "false");
+// ATTENTION: For test only. In test environment, there are no historical data,
+// so all rowset meta should have segment rows info.
+DEFINE_mBool(fail_when_segment_rows_not_in_rowset_meta,"false");
DEFINE_String(row_cache_mem_limit, "20%");
// Cache for storage page size
diff --git a/be/src/common/config.h b/be/src/common/config.h
index a17768fae6e..367999fdf49 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -425,6 +425,12 @@ DECLARE_mInt32(trash_file_expire_time_sec);
// modify them upon necessity
DECLARE_Int32(min_file_descriptor_number);
DECLARE_mBool(disable_segment_cache);
+// Enable checking segment rows consistency between rowset meta and segment
footer
+DECLARE_mBool(enable_segment_rows_consistency_check);
+DECLARE_mBool(enable_segment_rows_check_core);
+// ATTENTION: For test only. In test environment, there are no historical data,
+// so all rowset meta should have segment rows info.
+DECLARE_mBool(fail_when_segment_rows_not_in_rowset_meta);
DECLARE_String(row_cache_mem_limit);
// Cache for storage page size
diff --git a/be/src/olap/parallel_scanner_builder.cpp
b/be/src/olap/parallel_scanner_builder.cpp
index 81ef4d92e37..9dc0d46310e 100644
--- a/be/src/olap/parallel_scanner_builder.cpp
+++ b/be/src/olap/parallel_scanner_builder.cpp
@@ -168,7 +168,7 @@ Status
ParallelScannerBuilder::_build_scanners_by_rowid(std::list<VScannerSPtr>&
Status ParallelScannerBuilder::_load() {
_total_rows = 0;
size_t idx = 0;
- bthread::Mutex bmtx;
+ std::shared_ptr<bthread::Mutex> bmtx = std::make_shared<bthread::Mutex>();
std::vector<std::shared_ptr<std::promise<Status>>> proms;
proms.reserve(_tablets.size() * 50); // guest 50 rowsets per tablet
auto pool =
ExecEnv::GetInstance()->scanner_scheduler()->get_remote_scan_thread_pool();
@@ -187,19 +187,19 @@ Status ParallelScannerBuilder::_load() {
auto prom = std::make_shared<std::promise<Status>>();
proms.emplace_back(prom);
+ // although we persist the segment rows info in rowset meta, for
historical rowsets,
+ // we still need to load segments to get the segment rows info. So
we still fetch them concurrently.
auto st = pool->submit_scan_task(SimplifiedScanTask(
- [esc = enable_segment_cache, rowset, &bmtx, p =
std::move(prom), this] {
- SegmentCacheHandle sch;
- auto task_st =
SegmentLoader::instance()->load_segments(
- std::dynamic_pointer_cast<BetaRowset>(rowset),
&sch, esc, false,
- &_builder_stats);
+ [esc = enable_segment_cache, rowset, bmtx, p =
std::move(prom), this] {
+ std::vector<uint32_t> segment_rows;
+ auto beta_rowset =
std::dynamic_pointer_cast<BetaRowset>(rowset);
+ Status task_st =
beta_rowset->get_segment_num_rows(&segment_rows, esc,
+
&_builder_stats);
Defer defer([p, &task_st] { p->set_value(task_st); });
- if (!task_st.ok()) return;
- std::unique_lock lck(bmtx);
- for (const auto& segment : sch.get_segments()) {
-
_all_segments_rows[rowset->rowset_id()].emplace_back(
- segment->num_rows());
+ std::unique_lock lck(*bmtx);
+ for (const auto& num_rows : segment_rows) {
+
_all_segments_rows[rowset->rowset_id()].emplace_back(num_rows);
}
_total_rows += rowset->num_rows();
},
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index 3f7ae765c17..617fd2168d0 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -17,6 +17,7 @@
#include "olap/rowset/beta_rowset.h"
+#include <butil/logging.h>
#include <ctype.h>
#include <errno.h>
#include <fmt/format.h>
@@ -28,9 +29,11 @@
#include <utility>
#include "beta_rowset.h"
+#include "cloud/config.h"
#include "common/config.h"
#include "common/logging.h"
#include "common/status.h"
+#include "cpp/sync_point.h"
#include "io/fs/file_reader.h"
#include "io/fs/file_system.h"
#include "io/fs/local_file_system.h"
@@ -68,6 +71,91 @@ Status BetaRowset::init() {
return Status::OK(); // no op
}
+namespace {
+Status load_segment_rows_from_footer(BetaRowsetSharedPtr rowset,
+ std::vector<uint32_t>* segment_rows, bool
enable_segment_cache,
+ OlapReaderStatistics* read_stats) {
+ SegmentCacheHandle segment_cache_handle;
+ RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
+ rowset, &segment_cache_handle, enable_segment_cache, false,
read_stats));
+ for (const auto& segment : segment_cache_handle.get_segments()) {
+ segment_rows->emplace_back(segment->num_rows());
+ }
+ return Status::OK();
+}
+
+Status check_segment_rows_consistency(const std::vector<uint32_t>&
rows_from_meta,
+ const std::vector<uint32_t>&
rows_from_footer,
+ int64_t tablet_id, const std::string&
rowset_id) {
+ DCHECK_EQ(rows_from_footer.size(), rows_from_meta.size());
+ for (size_t i = 0; i < rows_from_footer.size(); i++) {
+ if (rows_from_footer[i] != rows_from_meta[i]) {
+ auto msg = fmt::format(
+ "segment rows mismatch between rowset meta and segment
footer. "
+ "segment index: {}, meta rows: {}, footer rows: {},
tablet={}, rowset={}",
+ i, rows_from_meta[i], rows_from_footer[i], tablet_id,
rowset_id);
+ if (config::enable_segment_rows_check_core) {
+ CHECK(false) << msg;
+ }
+ return Status::InternalError(msg);
+ }
+ }
+ return Status::OK();
+}
+} // namespace
+
+Status BetaRowset::get_segment_num_rows(std::vector<uint32_t>* segment_rows,
+ bool enable_segment_cache,
+ OlapReaderStatistics* read_stats) {
+ RETURN_IF_ERROR(_load_segment_rows_once.call([this, enable_segment_cache,
read_stats] {
+ auto segment_count = num_segments();
+
+ if (!_rowset_meta->get_num_segment_rows().empty()) {
+ if (_rowset_meta->get_num_segment_rows().size() == segment_count) {
+ // use segment rows in rowset meta if eligible
+
TEST_SYNC_POINT("BetaRowset::get_segment_num_rows:use_segment_rows_from_meta");
+
_segments_rows.assign(_rowset_meta->get_num_segment_rows().cbegin(),
+
_rowset_meta->get_num_segment_rows().cend());
+ if (config::enable_segment_rows_consistency_check) {
+ // verify segment rows from meta match segment footer
+ std::vector<uint32_t> rows_from_footer;
+ auto self =
std::dynamic_pointer_cast<BetaRowset>(shared_from_this());
+ auto load_status = load_segment_rows_from_footer(
+ self, &rows_from_footer, enable_segment_cache,
read_stats);
+ if (load_status.ok()) {
+ return check_segment_rows_consistency(
+ _segments_rows, rows_from_footer,
_rowset_meta->tablet_id(),
+ _rowset_meta->rowset_id().to_string());
+ }
+ }
+ return Status::OK();
+ } else {
+ auto msg = fmt::format(
+ "corrupted segment rows info in rowset meta. "
+ "segment count: {}, segment rows size: {}, tablet={},
rowset={}",
+ segment_count,
_rowset_meta->get_num_segment_rows().size(),
+ _rowset_meta->tablet_id(),
_rowset_meta->rowset_id().to_string());
+ if (config::enable_segment_rows_check_core) {
+ CHECK(false) << msg;
+ }
+ LOG_EVERY_SECOND(WARNING) << msg;
+ }
+ }
+ if (config::fail_when_segment_rows_not_in_rowset_meta) {
+ CHECK(false) << "segment rows info not found in rowset meta.
tablet="
+ << _rowset_meta->tablet_id()
+ << ", rowset=" <<
_rowset_meta->rowset_id().to_string();
+ }
+ // otherwise, read it from segment footer
+
TEST_SYNC_POINT("BetaRowset::get_segment_num_rows:load_from_segment_footer");
+ auto self = std::dynamic_pointer_cast<BetaRowset>(shared_from_this());
+ return load_segment_rows_from_footer(self, &_segments_rows,
enable_segment_cache,
+ read_stats);
+ }));
+ segment_rows->assign(_segments_rows.cbegin(), _segments_rows.cend());
+ return Status::OK();
+}
+
Status BetaRowset::do_load(bool /*use_cache*/) {
// do nothing.
// the segments in this rowset will be loaded by calling load_segments()
explicitly.
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index 32d8f3500a8..4c8cc4d3fb4 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -91,6 +91,9 @@ public:
Status show_nested_index_file(rapidjson::Value* rowset_value,
rapidjson::Document::AllocatorType&
allocator);
+ Status get_segment_num_rows(std::vector<uint32_t>* segment_rows, bool
enable_segment_cache,
+ OlapReaderStatistics* read_stats);
+
protected:
BetaRowset(const TabletSchemaSPtr& schema, const RowsetMetaSharedPtr&
rowset_meta,
std::string tablet_path);
@@ -109,6 +112,9 @@ protected:
private:
friend class RowsetFactory;
friend class BetaRowsetReader;
+
+ DorisCallOnce<Status> _load_segment_rows_once;
+ std::vector<uint32_t> _segments_rows;
};
} // namespace doris
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 0eea64eb0ee..d1e05ab276f 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -31,6 +31,7 @@
#include <utility>
// IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
+#include "common/cast_set.h"
#include "common/compiler_util.h" // IWYU pragma: keep
#include "common/config.h"
#include "common/logging.h"
@@ -683,6 +684,7 @@ Status BaseBetaRowsetWriter::add_rowset(RowsetSharedPtr
rowset) {
_num_segment += static_cast<int32_t>(rowset->num_segments());
// append key_bounds to current rowset
RETURN_IF_ERROR(rowset->get_segments_key_bounds(&_segments_encoded_key_bounds));
+ rowset->get_num_segment_rows(&_segment_num_rows);
_segments_key_bounds_truncated =
rowset->rowset_meta()->is_segments_key_bounds_truncated();
// TODO update zonemap
@@ -886,6 +888,7 @@ Status BaseBetaRowsetWriter::_build_rowset_meta(RowsetMeta*
rowset_meta, bool ch
int64_t total_data_size = 0;
int64_t total_index_size = 0;
std::vector<KeyBoundsPB> segments_encoded_key_bounds;
+ std::vector<uint32_t> segment_rows;
{
std::lock_guard<std::mutex> lock(_segid_statistics_map_mutex);
for (const auto& itr : _segid_statistics_map) {
@@ -893,14 +896,23 @@ Status
BaseBetaRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool ch
total_data_size += itr.second.data_size;
total_index_size += itr.second.index_size;
segments_encoded_key_bounds.push_back(itr.second.key_bounds);
+ // segcompaction don't modify _segment_num_rows, so we need to get
segment rows from _segid_statistics_map for load
+ segment_rows.push_back(cast_set<uint32_t>(itr.second.row_num));
}
}
+ if (segment_rows.empty()) {
+ // vertical compaction and linked schema change will not record
segment statistics,
+ // it will record segment rows in _segment_num_rows
+ RETURN_IF_ERROR(get_segment_num_rows(&segment_rows));
+ }
+
for (auto& key_bound : _segments_encoded_key_bounds) {
segments_encoded_key_bounds.push_back(key_bound);
}
if (_segments_key_bounds_truncated.has_value()) {
rowset_meta->set_segments_key_bounds_truncated(_segments_key_bounds_truncated.value());
}
+ rowset_meta->set_num_segment_rows(segment_rows);
// segment key bounds are empty in old version(before version 1.2.x). So
we should not modify
// the overlap property when key bounds are empty.
if (!segments_encoded_key_bounds.empty() &&
@@ -918,6 +930,13 @@ Status
BaseBetaRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool ch
"is: {}, _num_seg is: {}",
segments_encoded_key_bounds_size, segment_num);
}
+ if (segment_rows.size() != segment_num) {
+ return Status::InternalError(
+ "segment_rows size should equal to _num_seg, segment_rows
size is: {}, "
+ "_num_seg is {}, tablet={}, rowset={}, txn={}",
+ segment_rows.size(), segment_num, _context.tablet_id,
+ _context.rowset_id.to_string(), _context.txn_id);
+ }
}
rowset_meta->set_num_segments(segment_num);
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index 9e4ceb870cc..6324ea49c53 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -272,6 +272,10 @@ public:
return Status::OK();
}
+ void get_num_segment_rows(std::vector<uint32_t>* num_segment_rows) {
+ _rowset_meta->get_num_segment_rows(num_segment_rows);
+ }
+
// min key of the first segment
bool first_key(std::string* min_key) {
KeyBoundsPB key_bounds;
diff --git a/be/src/olap/rowset/rowset_meta.cpp
b/be/src/olap/rowset/rowset_meta.cpp
index ec77f06855d..a1ecd6643e1 100644
--- a/be/src/olap/rowset/rowset_meta.cpp
+++ b/be/src/olap/rowset/rowset_meta.cpp
@@ -26,6 +26,7 @@
#include "cloud/cloud_storage_engine.h"
#include "common/logging.h"
#include "common/status.h"
+#include "cpp/sync_point.h"
#include "google/protobuf/util/message_differencer.h"
#include "io/fs/encrypted_fs_factory.h"
#include "io/fs/file_system.h"
@@ -295,6 +296,15 @@ void RowsetMeta::merge_rowset_meta(const RowsetMeta&
other) {
set_total_disk_size(data_disk_size() + index_disk_size());
set_segments_key_bounds_truncated(is_segments_key_bounds_truncated() ||
other.is_segments_key_bounds_truncated());
+ if (_rowset_meta_pb.num_segment_rows_size() > 0 &&
other._rowset_meta_pb.num_segment_rows_size() > 0) {
+ for (auto row_count : other._rowset_meta_pb.num_segment_rows()) {
+ _rowset_meta_pb.add_num_segment_rows(row_count);
+ }
+ } else {
+ // this may happen when a partial update load commits in low version
+ // and publishes with new segments in high version
+ _rowset_meta_pb.clear_num_segment_rows();
+ }
for (auto&& key_bound : other.get_segments_key_bounds()) {
add_segment_key_bounds(key_bound);
}
@@ -313,6 +323,7 @@ void RowsetMeta::merge_rowset_meta(const RowsetMeta& other)
{
}
// In partial update the rowset schema maybe updated when table contains
variant type, so we need the newest schema to be updated
// Otherwise the schema is stale and lead to wrong data read
+
TEST_SYNC_POINT_RETURN_WITH_VOID("RowsetMeta::merge_rowset_meta:skip_schema_merge");
if (tablet_schema()->num_variant_columns() > 0) {
// merge extracted columns
TabletSchemaSPtr merged_schema;
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 8d79037b87c..10a889d32ef 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -140,6 +140,17 @@ public:
void set_num_rows(int64_t num_rows) {
_rowset_meta_pb.set_num_rows(num_rows); }
+ void set_num_segment_rows(const std::vector<uint32_t>& num_segment_rows) {
+
_rowset_meta_pb.mutable_num_segment_rows()->Assign(num_segment_rows.cbegin(),
num_segment_rows.cend());
+ }
+
+ void get_num_segment_rows(std::vector<uint32_t>* num_segment_rows) const {
+ num_segment_rows->assign(_rowset_meta_pb.num_segment_rows().cbegin(),
+ _rowset_meta_pb.num_segment_rows().cend());
+ }
+
+ auto& get_num_segment_rows() const { return
_rowset_meta_pb.num_segment_rows(); }
+
int64_t total_disk_size() const { return
_rowset_meta_pb.total_disk_size(); }
void set_total_disk_size(int64_t total_disk_size) {
diff --git a/be/test/olap/rowset/beta_rowset_test.cpp
b/be/test/olap/rowset/beta_rowset_test.cpp
index 9d2940888e7..f7f9f39e72e 100644
--- a/be/test/olap/rowset/beta_rowset_test.cpp
+++ b/be/test/olap/rowset/beta_rowset_test.cpp
@@ -40,6 +40,7 @@
#include "common/config.h"
#include "common/status.h"
+#include "cpp/sync_point.h"
#include "gen_cpp/olap_file.pb.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
@@ -414,4 +415,167 @@ TEST_F(BetaRowsetTest, GetIndexFileNames) {
}
}
+TEST_F(BetaRowsetTest, GetSegmentNumRowsFromMeta) {
+ // Test getting segment rows from rowset meta (new version data)
+ // This test verifies that when segment_rows is present in rowset meta,
+ // it uses the cached data directly without loading segments
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(tablet_schema);
+
+ auto rowset_meta = std::make_shared<RowsetMeta>();
+ init_rs_meta(rowset_meta, 1, 1);
+ rowset_meta->set_num_segments(3);
+
+ // Set segment rows in rowset meta (simulating new version data)
+ std::vector<uint32_t> expected_segment_rows = {100, 200, 300};
+ rowset_meta->set_num_segment_rows(expected_segment_rows);
+
+ auto rowset = std::make_shared<BetaRowset>(tablet_schema, rowset_meta, "");
+
+ // Use sync point to verify code path
+ auto sp = SyncPoint::get_instance();
+ bool used_meta_path = false;
+ bool used_footer_path = false;
+
+
sp->set_call_back("BetaRowset::get_segment_num_rows:use_segment_rows_from_meta",
+ [&](auto&& args) { used_meta_path = true; });
+
+
sp->set_call_back("BetaRowset::get_segment_num_rows:load_from_segment_footer",
+ [&](auto&& args) { used_footer_path = true; });
+
+ sp->enable_processing();
+
+ std::vector<uint32_t> segment_rows;
+ Status st = rowset->get_segment_num_rows(&segment_rows, false, &_stats);
+ ASSERT_TRUE(st.ok()) << st;
+ ASSERT_EQ(segment_rows.size(), 3);
+ ASSERT_EQ(segment_rows[0], 100);
+ ASSERT_EQ(segment_rows[1], 200);
+ ASSERT_EQ(segment_rows[2], 300);
+
+ // Verify that we used the meta path and not the footer path
+ ASSERT_TRUE(used_meta_path);
+ ASSERT_FALSE(used_footer_path);
+
+ // Test calling get_segment_num_rows twice to verify cache works
+ used_meta_path = false;
+ used_footer_path = false;
+ std::vector<uint32_t> segment_rows_2;
+ st = rowset->get_segment_num_rows(&segment_rows_2, false, &_stats);
+ ASSERT_TRUE(st.ok()) << st;
+ ASSERT_EQ(segment_rows_2.size(), 3);
+ ASSERT_EQ(segment_rows_2[0], 100);
+ ASSERT_EQ(segment_rows_2[1], 200);
+ ASSERT_EQ(segment_rows_2[2], 300);
+
+ EXPECT_FALSE(used_meta_path);
+ EXPECT_FALSE(used_footer_path);
+
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ sp->clear_trace();
+}
+
+TEST_F(BetaRowsetTest, GetSegmentNumRowsEmptyMeta) {
+ // Test when rowset meta has no segment rows (old version data)
+ // In this case, it should try to load segments from segment footer
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(tablet_schema);
+
+ auto rowset_meta = std::make_shared<RowsetMeta>();
+ init_rs_meta(rowset_meta, 1, 1);
+ rowset_meta->set_num_segments(2);
+ // segment_rows is empty (simulating old version data)
+
+ auto rowset = std::make_shared<BetaRowset>(tablet_schema, rowset_meta, "");
+
+ // Use sync point to verify code path
+ auto sp = SyncPoint::get_instance();
+ bool used_meta_path = false;
+ bool used_footer_path = false;
+
+
sp->set_call_back("BetaRowset::get_segment_num_rows:use_segment_rows_from_meta",
+ [&](auto&& args) { used_meta_path = true; });
+
+
sp->set_call_back("BetaRowset::get_segment_num_rows:load_from_segment_footer",
+ [&](auto&& args) { used_footer_path = true; });
+
+ sp->enable_processing();
+
+ std::vector<uint32_t> segment_rows;
+ Status st = rowset->get_segment_num_rows(&segment_rows, false, &_stats);
+
+ // Since we don't have actual segment files, it will fail to load segments
+ // But the important thing is to verify it tried to load from footer
+ ASSERT_TRUE(used_footer_path);
+ ASSERT_FALSE(used_meta_path);
+
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ sp->clear_trace();
+}
+
+TEST_F(BetaRowsetTest, GetSegmentNumRowsCorruptedMeta) {
+ // Test when segment_rows size doesn't match segment count
+ // This simulates a corrupted rowset meta
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(tablet_schema);
+
+ auto rowset_meta = std::make_shared<RowsetMeta>();
+ init_rs_meta(rowset_meta, 1, 1);
+ rowset_meta->set_num_segments(3);
+
+ // Set segment rows with wrong size (should be 3 but only has 2)
+ std::vector<uint32_t> wrong_segment_rows = {100, 200};
+ rowset_meta->set_num_segment_rows(wrong_segment_rows);
+
+ auto rowset = std::make_shared<BetaRowset>(tablet_schema, rowset_meta, "");
+
+ // Use sync point to verify code path
+ auto sp = SyncPoint::get_instance();
+ bool used_meta_path = false;
+ bool used_footer_path = false;
+
+
sp->set_call_back("BetaRowset::get_segment_num_rows:use_segment_rows_from_meta",
+ [&](auto&& args) { used_meta_path = true; });
+
+
sp->set_call_back("BetaRowset::get_segment_num_rows:load_from_segment_footer",
+ [&](auto&& args) { used_footer_path = true; });
+
+ sp->enable_processing();
+
+ std::vector<uint32_t> segment_rows;
+ Status st = rowset->get_segment_num_rows(&segment_rows, false, &_stats);
+
+ // When segment_rows size doesn't match, it should fall back to loading
from footer
+ ASSERT_FALSE(used_meta_path);
+ ASSERT_TRUE(used_footer_path);
+
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ sp->clear_trace();
+}
+
+TEST_F(BetaRowsetTest, GetNumSegmentRowsAPI) {
+ // Test the simple get_num_segment_rows API (without loading)
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(tablet_schema);
+
+ auto rowset_meta = std::make_shared<RowsetMeta>();
+ init_rs_meta(rowset_meta, 1, 1);
+ rowset_meta->set_num_segments(3);
+
+ std::vector<uint32_t> expected_segment_rows = {100, 200, 300};
+ rowset_meta->set_num_segment_rows(expected_segment_rows);
+
+ auto rowset = std::make_shared<BetaRowset>(tablet_schema, rowset_meta, "");
+
+ std::vector<uint32_t> segment_rows;
+ rowset->get_num_segment_rows(&segment_rows);
+ ASSERT_EQ(segment_rows.size(), 3);
+ ASSERT_EQ(segment_rows[0], 100);
+ ASSERT_EQ(segment_rows[1], 200);
+ ASSERT_EQ(segment_rows[2], 300);
+}
+
} // namespace doris
diff --git a/be/test/olap/rowset/rowset_meta_test.cpp
b/be/test/olap/rowset/rowset_meta_test.cpp
index 86ee132ba87..c78b5803f03 100644
--- a/be/test/olap/rowset/rowset_meta_test.cpp
+++ b/be/test/olap/rowset/rowset_meta_test.cpp
@@ -19,7 +19,6 @@
#include <gmock/gmock-actions.h>
#include <gmock/gmock-matchers.h>
-#include <gtest/gtest.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include <gtest/gtest.h>
@@ -30,8 +29,7 @@
#include <string>
#include "common/status.h"
-#include <gtest/gtest.h>
-#include <gtest/gtest.h>
+#include "cpp/sync_point.h"
#include "gtest/gtest_pred_impl.h"
#include "olap/olap_common.h"
#include "olap/olap_meta.h"
@@ -126,4 +124,183 @@ TEST_F(RowsetMetaTest, TestRowsetIdInit) {
EXPECT_EQ(id.to_string(), "72057594037927935");
}
+TEST_F(RowsetMetaTest, TestNumSegmentRowsSetAndGet) {
+ RowsetMeta rowset_meta;
+ EXPECT_TRUE(rowset_meta.init_from_json(_json_rowset_meta));
+
+ // Test set_num_segment_rows and get_num_segment_rows
+ std::vector<uint32_t> num_segment_rows = {100, 200, 300};
+ rowset_meta.set_num_segment_rows(num_segment_rows);
+
+ std::vector<uint32_t> retrieved_rows;
+ rowset_meta.get_num_segment_rows(&retrieved_rows);
+
+ EXPECT_EQ(retrieved_rows.size(), 3);
+ EXPECT_EQ(retrieved_rows[0], 100);
+ EXPECT_EQ(retrieved_rows[1], 200);
+ EXPECT_EQ(retrieved_rows[2], 300);
+
+ // Test get_num_segment_rows() const reference
+ const auto& num_segment_rows_ref = rowset_meta.get_num_segment_rows();
+ EXPECT_EQ(num_segment_rows_ref.size(), 3);
+ EXPECT_EQ(num_segment_rows_ref.Get(0), 100);
+ EXPECT_EQ(num_segment_rows_ref.Get(1), 200);
+ EXPECT_EQ(num_segment_rows_ref.Get(2), 300);
+
+ // Test serialization and deserialization
+ RowsetMetaPB rowset_meta_pb;
+ rowset_meta.to_rowset_pb(&rowset_meta_pb);
+ EXPECT_EQ(rowset_meta_pb.num_segment_rows_size(), 3);
+ EXPECT_EQ(rowset_meta_pb.num_segment_rows(0), 100);
+ EXPECT_EQ(rowset_meta_pb.num_segment_rows(1), 200);
+ EXPECT_EQ(rowset_meta_pb.num_segment_rows(2), 300);
+
+ RowsetMeta rowset_meta_2;
+ rowset_meta_2.init_from_pb(rowset_meta_pb);
+ std::vector<uint32_t> retrieved_rows_2;
+ rowset_meta_2.get_num_segment_rows(&retrieved_rows_2);
+ EXPECT_EQ(retrieved_rows_2.size(), 3);
+ EXPECT_EQ(retrieved_rows_2[0], 100);
+ EXPECT_EQ(retrieved_rows_2[1], 200);
+ EXPECT_EQ(retrieved_rows_2[2], 300);
+}
+
+TEST_F(RowsetMetaTest, TestNumSegmentRowsEmpty) {
+ RowsetMeta rowset_meta;
+ EXPECT_TRUE(rowset_meta.init_from_json(_json_rowset_meta));
+
+ // By default, num_segment_rows should be empty
+ std::vector<uint32_t> retrieved_rows;
+ rowset_meta.get_num_segment_rows(&retrieved_rows);
+ EXPECT_EQ(retrieved_rows.size(), 0);
+
+ const auto& num_segment_rows_ref = rowset_meta.get_num_segment_rows();
+ EXPECT_EQ(num_segment_rows_ref.size(), 0);
+}
+
+TEST_F(RowsetMetaTest, TestMergeRowsetMetaWithNumSegmentRows) {
+ RowsetMeta rowset_meta_1;
+ EXPECT_TRUE(rowset_meta_1.init_from_json(_json_rowset_meta));
+ std::vector<uint32_t> num_segment_rows_1 = {100, 200};
+ rowset_meta_1.set_num_segment_rows(num_segment_rows_1);
+ rowset_meta_1.set_num_segments(2);
+ rowset_meta_1.set_total_disk_size(1000);
+ rowset_meta_1.set_data_disk_size(800);
+ rowset_meta_1.set_index_disk_size(200);
+
+ RowsetMeta rowset_meta_2;
+ EXPECT_TRUE(rowset_meta_2.init_from_json(_json_rowset_meta));
+ std::vector<uint32_t> num_segment_rows_2 = {300, 400, 500};
+ rowset_meta_2.set_num_segment_rows(num_segment_rows_2);
+ rowset_meta_2.set_num_segments(3);
+ rowset_meta_2.set_total_disk_size(2000);
+ rowset_meta_2.set_data_disk_size(1600);
+ rowset_meta_2.set_index_disk_size(400);
+
+ // Use sync point to skip schema merge logic
+ auto sp = SyncPoint::get_instance();
+ bool skip_called = false;
+ sp->set_call_back("RowsetMeta::merge_rowset_meta:skip_schema_merge",
[&](auto&& args) {
+ skip_called = true;
+ // Set the return flag to skip the schema merge logic
+ auto pred = try_any_cast<bool*>(args.back());
+ *pred = true;
+ });
+ sp->enable_processing();
+
+ // Merge rowset_meta_2 into rowset_meta_1
+ rowset_meta_1.merge_rowset_meta(rowset_meta_2);
+
+ EXPECT_TRUE(skip_called);
+
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ sp->clear_trace();
+
+ // Check merged num_segment_rows
+ std::vector<uint32_t> merged_rows;
+ rowset_meta_1.get_num_segment_rows(&merged_rows);
+ EXPECT_EQ(merged_rows.size(), 5);
+ EXPECT_EQ(merged_rows[0], 100);
+ EXPECT_EQ(merged_rows[1], 200);
+ EXPECT_EQ(merged_rows[2], 300);
+ EXPECT_EQ(merged_rows[3], 400);
+ EXPECT_EQ(merged_rows[4], 500);
+
+ // Check merged num_segments
+ EXPECT_EQ(rowset_meta_1.num_segments(), 5);
+
+ // Check merged disk sizes
+ EXPECT_EQ(rowset_meta_1.total_disk_size(), 3000);
+}
+
+TEST_F(RowsetMetaTest, TestMergeRowsetMetaWithPartialNumSegmentRows) {
+ RowsetMeta rowset_meta_1;
+ EXPECT_TRUE(rowset_meta_1.init_from_json(_json_rowset_meta));
+ std::vector<uint32_t> num_segment_rows_1 = {100, 200};
+ rowset_meta_1.set_num_segment_rows(num_segment_rows_1);
+ rowset_meta_1.set_num_segments(2);
+
+ RowsetMeta rowset_meta_2;
+ EXPECT_TRUE(rowset_meta_2.init_from_json(_json_rowset_meta));
+ // rowset_meta_2 has no num_segment_rows (simulating old version data)
+ rowset_meta_2.set_num_segments(3);
+
+ // Use sync point to skip schema merge logic
+ auto sp = SyncPoint::get_instance();
+ sp->set_call_back("RowsetMeta::merge_rowset_meta:skip_schema_merge",
[&](auto&& args) {
+ auto pred = try_any_cast<bool*>(args.back());
+ *pred = true;
+ });
+ sp->enable_processing();
+
+ // Merge rowset_meta_2 into rowset_meta_1
+ rowset_meta_1.merge_rowset_meta(rowset_meta_2);
+
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ sp->clear_trace();
+
+ // num_segment_rows should be cleared when one of them is empty
+ std::vector<uint32_t> merged_rows;
+ rowset_meta_1.get_num_segment_rows(&merged_rows);
+ EXPECT_EQ(merged_rows.size(), 0);
+
+ // num_segments should still be merged
+ EXPECT_EQ(rowset_meta_1.num_segments(), 5);
+}
+
+TEST_F(RowsetMetaTest, TestMergeRowsetMetaBothEmpty) {
+ RowsetMeta rowset_meta_1;
+ EXPECT_TRUE(rowset_meta_1.init_from_json(_json_rowset_meta));
+ rowset_meta_1.set_num_segments(2);
+
+ RowsetMeta rowset_meta_2;
+ EXPECT_TRUE(rowset_meta_2.init_from_json(_json_rowset_meta));
+ rowset_meta_2.set_num_segments(3);
+
+ // Use sync point to skip schema merge logic
+ auto sp = SyncPoint::get_instance();
+ sp->set_call_back("RowsetMeta::merge_rowset_meta:skip_schema_merge",
[&](auto&& args) {
+ auto pred = try_any_cast<bool*>(args.back());
+ *pred = true;
+ });
+ sp->enable_processing();
+
+ // Merge rowset_meta_2 into rowset_meta_1
+ rowset_meta_1.merge_rowset_meta(rowset_meta_2);
+
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ sp->clear_trace();
+
+ // num_segment_rows should remain empty
+ std::vector<uint32_t> merged_rows;
+ rowset_meta_1.get_num_segment_rows(&merged_rows);
+ EXPECT_EQ(merged_rows.size(), 0);
+
+ // num_segments should still be merged
+ EXPECT_EQ(rowset_meta_1.num_segments(), 5);
+}
+
} // namespace doris
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 6ace01a9257..2c54cae5260 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -125,6 +125,9 @@ message RowsetMetaPB {
// indicate that whether the segments key bounds is truncated
optional bool segments_key_bounds_truncated = 55;
+ // rows count for each segment
+ repeated int64 num_segment_rows = 56;
+
// For cloud
// for data recycling
optional int64 txn_expiration = 1000;
@@ -216,6 +219,9 @@ message RowsetMetaCloudPB {
// indicate that whether the segments key bounds is truncated
optional bool segments_key_bounds_truncated = 55;
+ // rows count for each segment
+ repeated int64 num_segment_rows = 108;
+
// cloud
// the field is a vector, rename it
repeated int64 segments_file_size = 100;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]