gavinchou commented on code in PR #58301:
URL: https://github.com/apache/doris/pull/58301#discussion_r2564029093


##########
be/src/olap/rowset/beta_rowset.cpp:
##########
@@ -68,6 +71,86 @@ Status BetaRowset::init() {
     return Status::OK(); // no op
 }
 
+namespace {
+Status load_segment_rows_from_footer(BetaRowsetSharedPtr rowset,
+                                     std::vector<uint32_t>* segment_rows, bool 
enable_segment_cache,
+                                     OlapReaderStatistics* read_stats) {
+    SegmentCacheHandle segment_cache_handle;
+    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
+            rowset, &segment_cache_handle, enable_segment_cache, false, 
read_stats));
+    for (const auto& segment : segment_cache_handle.get_segments()) {
+        segment_rows->emplace_back(segment->num_rows());
+    }
+    return Status::OK();
+}
+
+Status check_segment_rows_consistency(const std::vector<uint32_t>& 
rows_from_meta,
+                                      const std::vector<uint32_t>& 
rows_from_footer,
+                                      int64_t tablet_id, const std::string& 
rowset_id) {
+    DCHECK_EQ(rows_from_footer.size(), rows_from_meta.size());
+    for (size_t i = 0; i < rows_from_footer.size(); i++) {
+        if (rows_from_footer[i] != rows_from_meta[i]) {
+            auto msg = fmt::format(
+                    "segment rows mismatch between rowset meta and segment 
footer. "
+                    "segment index: {}, meta rows: {}, footer rows: {}, 
tablet={}, rowset={}",
+                    i, rows_from_meta[i], rows_from_footer[i], tablet_id, 
rowset_id);
+            if (config::enable_segment_rows_check_core) {
+                CHECK(false) << msg;
+            }
+            return Status::InternalError(msg);
+        }
+    }
+    return Status::OK();
+}
+} // namespace
+
+Status BetaRowset::get_segment_num_rows(std::vector<uint32_t>* segment_rows,
+                                        bool enable_segment_cache,
+                                        OlapReaderStatistics* read_stats) {
+    RETURN_IF_ERROR(_load_segment_rows_once.call([this, enable_segment_cache, 
read_stats] {
+        auto segment_count = num_segments();
+
+        if (!_rowset_meta->get_segment_rows().empty()) {
+            if (_rowset_meta->get_segment_rows().size() == segment_count) {
+                // use segment rows in rowset meta if eligible
+                
TEST_SYNC_POINT("BetaRowset::get_segment_num_rows:use_segment_rows_from_meta");
+                
_segments_rows.assign(_rowset_meta->get_segment_rows().cbegin(),
+                                      _rowset_meta->get_segment_rows().cend());
+                if (config::enable_segment_rows_consistency_check) {
+                    // verify segment rows from meta match segment footer
+                    std::vector<uint32_t> rows_from_footer;
+                    auto self = 
std::dynamic_pointer_cast<BetaRowset>(shared_from_this());
+                    auto load_status = load_segment_rows_from_footer(
+                            self, &rows_from_footer, enable_segment_cache, 
read_stats);
+                    if (load_status.ok()) {
+                        return check_segment_rows_consistency(
+                                _segments_rows, rows_from_footer, 
_rowset_meta->tablet_id(),
+                                _rowset_meta->rowset_id().to_string());
+                    }
+                }
+                return Status::OK();
+            } else {

Review Comment:
   when does this happen?
   could we fallback to read from segment footer if this happened



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to