This is an automated email from the ASF dual-hosted git repository.
sollhui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3a05f60a302 [fix](be) catch block serialization exceptions to avoid
coredump (#64852)
3a05f60a302 is described below
commit 3a05f60a302274bd3b2353c1f7edb1dcd5130700
Author: hui lai <[email protected]>
AuthorDate: Tue Jun 30 14:15:25 2026 +0800
[fix](be) catch block serialization exceptions to avoid coredump (#64852)
### What problem does this PR solve?
Block serialization calculated datatype serialized size and serialized
column data without converting Doris exceptions to Status. When a string
column exceeded the LZ4 input limit, DataTypeString threw
BUFFER_OVERFLOW from get_uncompressed_serialized_bytes and the async
tablet-writer send thread could terminate unexpectedly:
```
W20260625 15:10:05.374955 27425 data_type_string.cpp:92] DataTypeString
serialized byte size exceeds LZ4 max input size, bytes=4245568000,
LZ4_MAX_INPUT_SIZE=2113929216, rows=4000, real_need_copy_num=4000
W20260625 15:10:05.377851 27425 exception.cpp:34] meet exception, error
code: -204, message: LZ4_compressBound meet invalid input size,
input_size=4245568000, LZ4_MAX_INPUT_SIZE=2113929216
0# doris::Exception::Exception(int, std::basic_string_view<char,
std::char_traits<char> > const&, bool) at
./be/build_RELEASE/../src/common/exception.cpp:0
1# doris::Exception::Exception(int, std::basic_string_view<char,
std::char_traits<char> > const&) at
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/basic_string.h:239
2# doris::Exception::Exception<unsigned long&, int>(int,
std::basic_string_view<char, std::char_traits<char> > const&, unsigned long&,
int&&) at
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/basic_string.h:239
3#
doris::DataTypeString::get_uncompressed_serialized_bytes(doris::IColumn const&,
int) const at ./be/build_RELEASE/../src/core/data_type/data_type_string.cpp:96
4#
doris::DataTypeNullable::get_uncompressed_serialized_bytes(doris::IColumn
const&, int) const at
./be/build_RELEASE/../src/core/data_type/data_type_nullable.cpp:75
5# doris::Block::serialize(int, doris::PBlock*, unsigned long*,
unsigned long*, long*, doris::segment_v2::CompressionTypePB, bool) const at
./be/build_RELEASE/../src/core/block/block.cpp:1022
6#
doris::VNodeChannel::try_send_pending_block(doris::RuntimeState*) at
./be/build_RELEASE/../src/exec/sink/writer/vtablet_writer.cpp:940
7# doris::ThreadPool::dispatch_thread() at
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/shared_ptr_base.h:1097
8# doris::Thread::supervise_thread(void*) at
/usr/local/ldb-toolchain-v0.26/bin/../usr/include/pthread.h:562
9# ?
10# ?
```
Catch datatype serialization exceptions in Block::serialize so callers
receive a failed Status and cancel the load normally.
---
be/src/core/block/block.cpp | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/be/src/core/block/block.cpp b/be/src/core/block/block.cpp
index d94f6b3a186..4dd6530f55d 100644
--- a/be/src/core/block/block.cpp
+++ b/be/src/core/block/block.cpp
@@ -1029,14 +1029,16 @@ Status Block::serialize(int be_exec_version, PBlock*
pblock,
// calc uncompressed size for allocation
size_t content_uncompressed_size = 0;
- for (const auto& c : *this) {
- PColumnMeta* pcm = pblock->add_column_metas();
- c.to_pb_column_meta(pcm);
- DCHECK(pcm->type() != PGenericType::UNKNOWN) << " forget to set pb
type";
- // get serialized size
- content_uncompressed_size +=
- c.type->get_uncompressed_serialized_bytes(*(c.column),
pblock->be_exec_version());
- }
+ RETURN_IF_CATCH_EXCEPTION({
+ for (const auto& c : *this) {
+ PColumnMeta* pcm = pblock->add_column_metas();
+ c.to_pb_column_meta(pcm);
+ DCHECK(pcm->type() != PGenericType::UNKNOWN) << " forget to set pb
type";
+ // get serialized size
+ content_uncompressed_size +=
c.type->get_uncompressed_serialized_bytes(
+ *(c.column), pblock->be_exec_version());
+ }
+ });
// serialize data values
// when data type is HLL, content_uncompressed_size maybe larger than real
size.
@@ -1052,9 +1054,11 @@ Status Block::serialize(int be_exec_version, PBlock*
pblock,
}
char* buf = column_values.data();
- for (const auto& c : *this) {
- buf = c.type->serialize(*(c.column), buf, pblock->be_exec_version());
- }
+ RETURN_IF_CATCH_EXCEPTION({
+ for (const auto& c : *this) {
+ buf = c.type->serialize(*(c.column), buf,
pblock->be_exec_version());
+ }
+ });
*uncompressed_bytes = content_uncompressed_size;
const size_t serialize_bytes = buf - column_values.data() +
STREAMVBYTE_PADDING;
*compressed_bytes = serialize_bytes;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]