This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 00ffd1ccd92 branch-4.0: [improve](log) Add segment file info when
bitshuffle page corruption detected #60547 (#60689)
00ffd1ccd92 is described below
commit 00ffd1ccd9209bf697dd5957bf914b45f8562bd9
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Feb 13 09:44:52 2026 +0800
branch-4.0: [improve](log) Add segment file info when bitshuffle page
corruption detected #60547 (#60689)
Cherry-picked from #60547
Co-authored-by: Luwei <[email protected]>
---
be/src/olap/rowset/segment_v2/bitshuffle_page.h | 8 +++++---
be/src/olap/rowset/segment_v2/column_reader.cpp | 11 ++++++++---
be/src/olap/rowset/segment_v2/indexed_column_reader.cpp | 5 +++++
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
index f518ac655d7..5a9c14b735c 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@@ -282,13 +282,15 @@ inline Status parse_bit_shuffle_header(const Slice& data,
size_t& num_elements,
num_elements = decode_fixed32_le((const uint8_t*)&data[0]);
compressed_size = decode_fixed32_le((const uint8_t*)&data[4]);
num_element_after_padding = decode_fixed32_le((const uint8_t*)&data[8]);
+ size_of_element = decode_fixed32_le((const uint8_t*)&data[12]);
if (num_element_after_padding != ALIGN_UP(num_elements, 8)) {
return Status::InternalError(
"num of element information corrupted,"
- " _num_element_after_padding:{}, _num_elements:{}",
- num_element_after_padding, num_elements);
+ " _num_element_after_padding:{}, _num_elements:{},
expected_padding:{},"
+ " compressed_size:{}, size_of_element:{}, data_size:{}",
+ num_element_after_padding, num_elements,
ALIGN_UP(num_elements, 8), compressed_size,
+ size_of_element, data.size);
}
- size_of_element = decode_fixed32_le((const uint8_t*)&data[12]);
switch (size_of_element) {
case 1:
case 2:
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 300994062ea..49c536cd0d3 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -1981,9 +1981,14 @@ Status FileColumnIterator::_read_data_page(const
OrdinalPageIndexIterator& iter)
RETURN_IF_ERROR(
_reader->read_page(_opts, iter.page(), &handle, &page_body,
&footer, _compress_codec));
// parse data page
- RETURN_IF_ERROR(ParsedPage::create(std::move(handle), page_body,
footer.data_page_footer(),
- _reader->encoding_info(), iter.page(),
iter.page_index(),
- &_page));
+ auto st = ParsedPage::create(std::move(handle), page_body,
footer.data_page_footer(),
+ _reader->encoding_info(), iter.page(),
iter.page_index(), &_page);
+ if (!st.ok()) {
+ LOG(WARNING) << "failed to create ParsedPage, file=" <<
_opts.file_reader->path().native()
+ << ", page_offset=" << iter.page().offset << ",
page_size=" << iter.page().size
+ << ", page_index=" << iter.page_index() << ", error=" <<
st;
+ return st;
+ }
// dictionary page is read when the first data page that uses it is read,
// this is to optimize the memory usage: when there is no query on one
column, we could
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index 60327d36194..62325f1dbe2 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -165,6 +165,11 @@ Status IndexedColumnIterator::_read_data_page(const
PagePointer& pp) {
opts.need_check_bitmap = false;
status = ParsedPage::create(std::move(handle), body,
footer.data_page_footer(),
_reader->encoding_info(), pp, 0, &_data_page,
opts);
+ if (!status.ok()) {
+ LOG(WARNING) << "failed to create ParsedPage in IndexedColumnIterator,
file="
+ << _reader->_file_reader->path().native() << ",
page_offset=" << pp.offset
+ << ", page_size=" << pp.size << ", error=" << status;
+ }
DCHECK(_reader->_meta.ordinal_index_meta().is_root_data_page()
? _reader->_meta.num_values() == _data_page.num_rows
: true);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]