This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 00ffd1ccd92 branch-4.0: [improve](log) Add segment file info when 
bitshuffle page corruption detected #60547 (#60689)
00ffd1ccd92 is described below

commit 00ffd1ccd9209bf697dd5957bf914b45f8562bd9
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Feb 13 09:44:52 2026 +0800

    branch-4.0: [improve](log) Add segment file info when bitshuffle page 
corruption detected #60547 (#60689)
    
    Cherry-picked from #60547
    
    Co-authored-by: Luwei <[email protected]>
---
 be/src/olap/rowset/segment_v2/bitshuffle_page.h         |  8 +++++---
 be/src/olap/rowset/segment_v2/column_reader.cpp         | 11 ++++++++---
 be/src/olap/rowset/segment_v2/indexed_column_reader.cpp |  5 +++++
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h 
b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
index f518ac655d7..5a9c14b735c 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@@ -282,13 +282,15 @@ inline Status parse_bit_shuffle_header(const Slice& data, 
size_t& num_elements,
     num_elements = decode_fixed32_le((const uint8_t*)&data[0]);
     compressed_size = decode_fixed32_le((const uint8_t*)&data[4]);
     num_element_after_padding = decode_fixed32_le((const uint8_t*)&data[8]);
+    size_of_element = decode_fixed32_le((const uint8_t*)&data[12]);
     if (num_element_after_padding != ALIGN_UP(num_elements, 8)) {
         return Status::InternalError(
                 "num of element information corrupted,"
-                " _num_element_after_padding:{}, _num_elements:{}",
-                num_element_after_padding, num_elements);
+                " _num_element_after_padding:{}, _num_elements:{}, 
expected_padding:{},"
+                " compressed_size:{}, size_of_element:{}, data_size:{}",
+                num_element_after_padding, num_elements, 
ALIGN_UP(num_elements, 8), compressed_size,
+                size_of_element, data.size);
     }
-    size_of_element = decode_fixed32_le((const uint8_t*)&data[12]);
     switch (size_of_element) {
     case 1:
     case 2:
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 300994062ea..49c536cd0d3 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -1981,9 +1981,14 @@ Status FileColumnIterator::_read_data_page(const 
OrdinalPageIndexIterator& iter)
     RETURN_IF_ERROR(
             _reader->read_page(_opts, iter.page(), &handle, &page_body, 
&footer, _compress_codec));
     // parse data page
-    RETURN_IF_ERROR(ParsedPage::create(std::move(handle), page_body, 
footer.data_page_footer(),
-                                       _reader->encoding_info(), iter.page(), 
iter.page_index(),
-                                       &_page));
+    auto st = ParsedPage::create(std::move(handle), page_body, 
footer.data_page_footer(),
+                                 _reader->encoding_info(), iter.page(), 
iter.page_index(), &_page);
+    if (!st.ok()) {
+        LOG(WARNING) << "failed to create ParsedPage, file=" << 
_opts.file_reader->path().native()
+                     << ", page_offset=" << iter.page().offset << ", 
page_size=" << iter.page().size
+                     << ", page_index=" << iter.page_index() << ", error=" << 
st;
+        return st;
+    }
 
     // dictionary page is read when the first data page that uses it is read,
     // this is to optimize the memory usage: when there is no query on one 
column, we could
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp 
b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index 60327d36194..62325f1dbe2 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -165,6 +165,11 @@ Status IndexedColumnIterator::_read_data_page(const 
PagePointer& pp) {
     opts.need_check_bitmap = false;
     status = ParsedPage::create(std::move(handle), body, 
footer.data_page_footer(),
                                 _reader->encoding_info(), pp, 0, &_data_page, 
opts);
+    if (!status.ok()) {
+        LOG(WARNING) << "failed to create ParsedPage in IndexedColumnIterator, 
file="
+                     << _reader->_file_reader->path().native() << ", 
page_offset=" << pp.offset
+                     << ", page_size=" << pp.size << ", error=" << status;
+    }
     DCHECK(_reader->_meta.ordinal_index_meta().is_root_data_page()
                    ? _reader->_meta.num_values() == _data_page.num_rows
                    : true);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to