This is an automated email from the ASF dual-hosted git repository.
HappenLee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8b3912260d4 [Fix](arrow flight) Fix arrow::Status inline static empty
msg core (#63191)
8b3912260d4 is described below
commit 8b3912260d4cbbf6815b9dce944aa12d47d57810
Author: linrrarity <[email protected]>
AuthorDate: Thu May 14 11:24:06 2026 +0800
[Fix](arrow flight) Fix arrow::Status inline static empty msg core (#63191)
Related PR: #62489
Problem Summary:
```cpp
static const std::string no_message = "";
return ok() ? no_message : state_->msg;
```
In the clang-built Doris BE binary, this inline static `std::string` is
emitted as a weak/COMDAT object and placed in `.data.rel.ro`.
```bash
$ readelf -sW "$BIN" | c++filt | grep 'Status::message.*no_message'
105229: 000000007d6958e0 32 OBJECT WEAK DEFAULT 28
arrow::Status::message[abi:cxx11]() const::no_message[abi:cxx11]
$ readelf -SW doris/output/be/lib/doris_be | grep '\[ *28\]'
[28] .data.rel.ro PROGBITS 000000007d0f7720 7d0f6720 7e4208 00
WA 0 0 32
```
After relocation, RELRO makes this section read-only. However, C++
function-local statics are lazily initialized on first execution, so the
first call to `Status::message()` tries to construct `no_message` at
runtime. The `std::string` constructor writes to the object storage in
`.data.rel.ro`, which triggers `SIGSEGV invalid permissions for mapped
object` like:
*FromStatus(arrow::Status)*
```text
*** Query id: 0-0 ***
*** is nereids: 0 ***
*** tablet id: 0 ***
*** Aborted at 1778559900 (unix time) try "date -d @1778559900" if you are
using GNU date ***
*** Current BE git commitID: f02e9e680c8 ***
*** SIGSEGV invalid permissions for mapped object (@0xaaaaf1ecbf28)
received by PID 3634450 (TID 3636517 OR 0xfffa35fe97c0) from PID
18446744073473408808; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*, void*) at /root/selectdb-core/be/src/common/signal_handler.h:421
1# PosixSignals::chained_handler(int, siginfo_t*, void*) [clone .part.0]
in /opt/jdk/lib/server/libjvm.so
2# JVM_handle_linux_signal in /opt/jdk/lib/server/libjvm.so
3# 0x0000FFFFAE091830 in linux-vdso.so.1
4# std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >::basic_string<std::allocator<char> >(char const*,
std::allocator<char> const&) at
/root/toolchain/ldb-toolchain-v0.21/bin/../lib/gcc/aarch64-linux-gnu/13/../../../../include/c++/13/bits/basic_string.h:632
5# arrow::flight::internal::TransportStatus::FromStatus(arrow::Status
const&) in /opt/selectdb/4.1.3.2026042721/be/lib/doris_be
6# arrow::flight::transport::grpc::ToGrpcStatus(arrow::Status const&,
grpc::ServerContext*) in /opt/selectdb/4.1.3.2026042721/be/lib/doris_be
7# arrow::flight::transport::grpc::(anonymous
namespace)::GrpcServiceHandler::DoGet(grpc::ServerContext*,
arrow::flight::protocol::Ticket const*,
grpc::ServerWriter<arrow::flight::protocol::FlightData>*) in
/opt/selectdb/4.1.3.2026042721/be/lib/doris_be
8# grpc::Status
grpc::internal::CatchingFunctionHandler<grpc::internal::ServerStreamingHandler<arrow::flight::protocol::FlightService::Service,
arrow::flight::protocol::Ticket,
arrow::flight::protocol::FlightData>::RunHandler(grpc::internal::MethodHandler::HandlerParameter
const&)::{lambda()#1}>(grpc::internal::ServerStreamingHandler<arrow::flight::protocol::FlightService::Service,
arrow::flight::protocol::Ticket,
arrow::flight::protocol::FlightData>::RunHandler(grpc::internal::Metho [...]
9#
grpc::internal::ServerStreamingHandler<arrow::flight::protocol::FlightService::Service,
arrow::flight::protocol::Ticket,
arrow::flight::protocol::FlightData>::RunHandler(grpc::internal::MethodHandler::HandlerParameter
const&) in /opt/selectdb/4.1.3.2026042721/be/lib/doris_be
10# grpc::Server::SyncRequest::ContinueRunAfterInterception() in
/opt/selectdb/4.1.3.2026042721/be/lib/doris_be
11#
grpc::Server::SyncRequest::Run(std::shared_ptr<grpc::Server::GlobalCallbacks>
const&, bool) in /opt/selectdb/4.1.3.2026042721/be/lib/doris_be
12# grpc::ThreadManager::MainWorkLoop() in
/opt/selectdb/4.1.3.2026042721/be/lib/doris_be
13#
grpc::ThreadManager::WorkerThread::WorkerThread(grpc::ThreadManager*)::$_0::__invoke(void*)
in /opt/selectdb/4.1.3.2026042721/be/lib/doris_be
14# grpc_core::(anonymous
namespace)::ThreadInternalsPosix::ThreadInternalsPosix(char const*, void
(*)(void*), void*, bool*, grpc_core::Thread::Options
const&)::{lambda(void*)#1}::__invoke(void*) in
/opt/selectdb/4.1.3.2026042721/be/lib/doris_be
15# start_thread in /lib64/libc.so.6
16# thread_start in /lib64/libc.so.6
```
*ToStatus()*
```text
*** SIGSEGV invalid permissions for mapped object (@0x56448ff1ef38)
received by PID 66637 (TID 67372 OR 0x7f2687017640) from PID
18446744071829581624; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*, void*) at ../src/common/signal_handler.h:417
1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
2# JVM_handle_linux_signal in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
3# 0x00007F2836CE7520 in /lib/x86_64-linux-gnu/libc.so.6
4# std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >::basic_string<std::allocator<char> >(char const*,
std::allocator<char> const&) at
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/basic_string.h:707
5# arrow::Status::WithDetail(std::shared_ptr<arrow::StatusDetail>) const
in /mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
6# arrow::flight::internal::TransportStatus::ToStatus() const in
/mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
7# arrow::flight::transport::grpc::FromGrpcStatus(grpc::Status const&,
grpc::ClientContext*) in /mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
8# arrow::flight::transport::grpc::(anonymous
namespace)::GrpcClientInterceptorAdapter::Intercept(grpc::experimental::InterceptorBatchMethods*)
in /mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
9# grpc::internal::InterceptorBatchMethodsImpl::RunInterceptors() in
/mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
10# grpc::internal::CallOpSet<grpc::internal::CallOpRecvInitialMetadata,
grpc::internal::CallOpClientRecvStatus, grpc::internal::CallNoOp<3>,
grpc::internal::CallNoOp<4>, grpc::internal::CallNoOp<5>,
grpc::internal::CallNoOp<6> >::FinalizeResult(void**, bool*) in
/mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
11# grpc::ClientReaderWriter<arrow::flight::protocol::FlightData,
arrow::flight::protocol::FlightData>::Finish() in
/mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
12# arrow::flight::transport::grpc::(anonymous
namespace)::FinishableDataStream<grpc::ClientReaderWriter<arrow::flight::protocol::FlightData,
arrow::flight::protocol::FlightData>,
arrow::flight::internal::FlightData>::DoFinish() in
/mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
13# arrow::flight::transport::grpc::(anonymous
namespace)::WritableDataStream<grpc::ClientReaderWriter<arrow::flight::protocol::FlightData,
arrow::flight::protocol::FlightData>,
arrow::flight::internal::FlightData>::DoFinish() in
/mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
14# arrow::flight::internal::ClientDataStream::Finish(arrow::Status) in
/mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
15# arrow::flight::ClientStreamReader::Next() in
/mnt/disk1/PERFORMANCE_ENV/be/lib/doris_be
16# doris::PythonClient::read_batch(std::shared_ptr<arrow::RecordBatch>*)
at ./be/build_RELEASE/../src/udf/python/python_client.cpp:136
17# doris::PythonUDFClient::evaluate(arrow::RecordBatch const&,
std::shared_ptr<arrow::RecordBatch>*) at
./be/build_RELEASE/../src/udf/python/python_udf_client.cpp:36
```
### fix
Move the empty message/detail sentinels out of the header inline path
and make non-OK statuses return `state_->msg` directly. This avoids
touching the empty OK-message sentinel on error paths and prevents the
inline weak/COMDAT `std::string` object from being lazily constructed
from a read-only mapping.
---
thirdparty/download-thirdparty.sh | 10 +-
...he-arrow-17.0.0-flight-safe-finish-status.patch | 216 ---------------------
...che-arrow-17.0.0-status-inline-static-fix.patch | 58 ++++++
3 files changed, 64 insertions(+), 220 deletions(-)
diff --git a/thirdparty/download-thirdparty.sh
b/thirdparty/download-thirdparty.sh
index 55e7195b6d5..ab2849e373c 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -449,10 +449,12 @@ if [[ " ${TP_ARCHIVES[*]} " =~ " ARROW " ]]; then
# Introducing the parameter that forces writing int96 timestampes
for compatibility with Paimon cpp.
patch -p1
<"${TP_PATCH_DIR}/apache-arrow-17.0.0-force-write-int96-timestamps.patch"
- # apache-arrow-17.0.0-flight-safe-finish-status.patch :
- # Avoid Flight client crashes during stream teardown by skipping
rich
- # status reconstruction on gRPC finish errors.
- patch -p1
<"${TP_PATCH_DIR}/apache-arrow-17.0.0-flight-safe-finish-status.patch"
+ # apache-arrow-17.0.0-status-inline-static-fix.patch :
+ # Move Status::message()/detail() empty sentinels out of header
+ # inline function-local statics. Clang can place those weak inline
+ # std::string objects in RELRO, then crash while initializing them.
+ patch -p1
<"${TP_PATCH_DIR}/apache-arrow-17.0.0-status-inline-static-fix.patch"
+
touch "${PATCHED_MARK}"
fi
cd -
diff --git
a/thirdparty/patches/apache-arrow-17.0.0-flight-safe-finish-status.patch
b/thirdparty/patches/apache-arrow-17.0.0-flight-safe-finish-status.patch
deleted file mode 100644
index c2be98d5bb5..00000000000
--- a/thirdparty/patches/apache-arrow-17.0.0-flight-safe-finish-status.patch
+++ /dev/null
@@ -1,216 +0,0 @@
----
arrow-apache-arrow-17.0.0-before-flight-finish-fix/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
-+++
arrow-apache-arrow-17.0.0/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
-@@ -71,108 +71,146 @@
- struct ClientRpc {
- ::grpc::ClientContext context;
-
- explicit ClientRpc(const FlightCallOptions& options) {
- if (options.timeout.count() >= 0) {
- std::chrono::system_clock::time_point deadline =
-
std::chrono::time_point_cast<std::chrono::system_clock::time_point::duration>(
- std::chrono::system_clock::now() + options.timeout);
- context.set_deadline(deadline);
- }
- for (auto header : options.headers) {
- context.AddMetadata(header.first, header.second);
- }
- }
-
- /// \brief Add an auth token via an auth handler
- Status SetToken(ClientAuthHandler* auth_handler) {
- if (auth_handler) {
- std::string token;
- RETURN_NOT_OK(auth_handler->GetToken(&token));
- context.AddMetadata(kGrpcAuthHeader, token);
- }
- return Status::OK();
- }
- };
-
- class GrpcAddClientHeaders : public AddCallHeaders {
- public:
- explicit GrpcAddClientHeaders(std::multimap<::grpc::string,
::grpc::string>* metadata)
- : metadata_(metadata) {}
- ~GrpcAddClientHeaders() override = default;
-
- void AddHeader(const std::string& key, const std::string& value) override {
- metadata_->insert(std::make_pair(key, value));
- }
-
- private:
- std::multimap<::grpc::string, ::grpc::string>* metadata_;
- };
-
-+// Terminal RPC status is observed while gRPC is already completing the call.
-+// Keep conversion minimal here instead of rebuilding rich Flight details.
-+Status MakeSafeTerminalStatus(const ::grpc::Status& grpc_status) {
-+ if (grpc_status.ok()) {
-+ return Status::OK();
-+ }
-+
-+ const auto error_code = grpc_status.error_code();
-+ const std::string error_message = grpc_status.error_message();
-+ switch (error_code) {
-+ case ::grpc::StatusCode::CANCELLED:
-+ return Status::Cancelled("Flight stream finish failed with message: ",
-+ error_message);
-+ case ::grpc::StatusCode::UNKNOWN:
-+ return Status::UnknownError("Flight stream finish failed with message:
",
-+ error_message);
-+ case ::grpc::StatusCode::INVALID_ARGUMENT:
-+ case ::grpc::StatusCode::FAILED_PRECONDITION:
-+ case ::grpc::StatusCode::OUT_OF_RANGE:
-+ case ::grpc::StatusCode::RESOURCE_EXHAUSTED:
-+ return Status::Invalid("Flight stream finish failed with message: ",
-+ error_message);
-+ case ::grpc::StatusCode::NOT_FOUND:
-+ return Status::KeyError("Flight stream finish failed with message: ",
-+ error_message);
-+ case ::grpc::StatusCode::ALREADY_EXISTS:
-+ return Status::AlreadyExists("Flight stream finish failed with message:
",
-+ error_message);
-+ case ::grpc::StatusCode::UNIMPLEMENTED:
-+ return Status::NotImplemented("Flight stream finish failed with
message: ",
-+ error_message);
-+ default:
-+ return Status::IOError("Flight stream finish failed with gRPC code ",
-+ static_cast<int>(error_code), ", message: ",
-+ error_message);
-+ }
-+}
-+
- class GrpcClientInterceptorAdapter : public ::grpc::experimental::Interceptor
{
- public:
- explicit GrpcClientInterceptorAdapter(
- std::vector<std::unique_ptr<ClientMiddleware>> middleware)
- : middleware_(std::move(middleware)) {}
-
- void Intercept(::grpc::experimental::InterceptorBatchMethods* methods)
override {
- using InterceptionHookPoints =
::grpc::experimental::InterceptionHookPoints;
- if (methods->QueryInterceptionHookPoint(
- InterceptionHookPoints::PRE_SEND_INITIAL_METADATA)) {
- GrpcAddClientHeaders add_headers(methods->GetSendInitialMetadata());
- for (const auto& middleware : middleware_) {
- middleware->SendingHeaders(&add_headers);
- }
- }
-
- if (methods->QueryInterceptionHookPoint(
- InterceptionHookPoints::POST_RECV_INITIAL_METADATA)) {
- if (!methods->GetRecvInitialMetadata()->empty()) {
- ReceivedHeaders(*methods->GetRecvInitialMetadata());
- }
- }
-
- if
(methods->QueryInterceptionHookPoint(InterceptionHookPoints::POST_RECV_STATUS))
{
- DCHECK_NE(nullptr, methods->GetRecvStatus());
- DCHECK_NE(nullptr, methods->GetRecvTrailingMetadata());
- ReceivedHeaders(*methods->GetRecvTrailingMetadata());
-- const Status status = FromGrpcStatus(*methods->GetRecvStatus());
-+ const Status status = MakeSafeTerminalStatus(*methods->GetRecvStatus());
- for (const auto& middleware : middleware_) {
- middleware->CallCompleted(status);
- }
- }
-
- methods->Proceed();
- }
-
- private:
- void ReceivedHeaders(
- const std::multimap<::grpc::string_ref, ::grpc::string_ref>& metadata) {
- CallHeaders headers;
- for (const auto& entry : metadata) {
- headers.insert({std::string_view(entry.first.data(),
entry.first.length()),
- std::string_view(entry.second.data(),
entry.second.length())});
- }
- for (const auto& middleware : middleware_) {
- middleware->ReceivedHeaders(headers);
- }
- }
-
- std::vector<std::unique_ptr<ClientMiddleware>> middleware_;
- };
-
- class GrpcClientInterceptorAdapterFactory
- : public ::grpc::experimental::ClientInterceptorFactoryInterface {
- public:
- explicit GrpcClientInterceptorAdapterFactory(
- std::vector<std::shared_ptr<ClientMiddlewareFactory>> middleware)
- : middleware_(std::move(middleware)) {}
-
- ::grpc::experimental::Interceptor* CreateClientInterceptor(
- ::grpc::experimental::ClientRpcInfo* info) override {
- std::vector<std::unique_ptr<ClientMiddleware>> middleware;
-
- FlightMethod flight_method = FlightMethod::Invalid;
- std::string_view method(info->method());
- if (EndsWith(method, "/Handshake")) {
- flight_method = FlightMethod::Handshake;
- } else if (EndsWith(method, "/ListFlights")) {
-@@ -283,20 +321,10 @@
- // Drain the read side to avoid gRPC hanging in Finish()
- }
-
-- server_status_ = FromGrpcStatus(stream_->Finish(), &rpc_->context);
-- if (!server_status_.ok()) {
-- server_status_ = Status::FromDetailAndArgs(
-- server_status_.code(), server_status_.detail(),
server_status_.message(),
-- ". gRPC client debug context: ",
rpc_->context.debug_error_string());
-- }
-+ server_status_ = MakeSafeTerminalStatus(stream_->Finish());
- if (!transport_status_.ok()) {
- if (server_status_.ok()) {
- server_status_ = transport_status_;
-- } else {
-- server_status_ = Status::FromDetailAndArgs(
-- server_status_.code(), server_status_.detail(),
server_status_.message(),
-- ". gRPC client debug context: ",
rpc_->context.debug_error_string(),
-- ". Additional context: ", transport_status_.ToString());
- }
- }
- finished_ = true;
-@@ -349,10 +377,8 @@
- done_writing_ = true;
-
- Status st = Base::DoFinish();
-- if (!finished_writes) {
-- return Status::FromDetailAndArgs(
-- st.code(), st.detail(), st.message(),
-- ". Additionally, could not finish writing record batches before
closing");
-+ if (!finished_writes && st.ok()) {
-+ return Status::IOError("Could not finish writing record batches before
closing");
- }
- return st;
- }
-@@ -502,7 +528,7 @@
- ~GrpcResultStream() override {
- if (stream_) {
- rpc_.context.TryCancel();
-- auto status = FromGrpcStatus(stream_->Finish(), &rpc_.context);
-+ auto status = MakeSafeTerminalStatus(stream_->Finish());
- if (!status.ok() && !status.IsCancelled()) {
- ARROW_LOG(DEBUG)
- << "DoAction result was not fully consumed, server returned
error: "
-@@ -542,7 +568,7 @@
- }
- RETURN_NOT_OK(stop_token_.Poll());
-
-- status_ = FromGrpcStatus(stream_->Finish(), &rpc_.context);
-+ status_ = MakeSafeTerminalStatus(stream_->Finish());
- stream_.reset();
- }
- RETURN_NOT_OK(status_);
----
arrow-apache-arrow-17.0.0-before-flight-finish-fix/cpp/src/arrow/flight/transport.cc
-+++ arrow-apache-arrow-17.0.0/cpp/src/arrow/flight/transport.cc
-@@ -47,9 +47,7 @@
- auto server_status = DoFinish();
- if (server_status.ok()) return st;
-
-- return Status::FromDetailAndArgs(server_status.code(),
server_status.detail(),
-- server_status.message(),
-- ". Client context: ", st.ToString());
-+ return server_status;
- }
-
- Status ClientTransport::Authenticate(const FlightCallOptions& options,
diff --git
a/thirdparty/patches/apache-arrow-17.0.0-status-inline-static-fix.patch
b/thirdparty/patches/apache-arrow-17.0.0-status-inline-static-fix.patch
new file mode 100644
index 00000000000..2a1ed534077
--- /dev/null
+++ b/thirdparty/patches/apache-arrow-17.0.0-status-inline-static-fix.patch
@@ -0,0 +1,58 @@
+diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc
+index a9581cadc9..1b7ee7df62 100644
+--- a/cpp/src/arrow/status.cc
++++ b/cpp/src/arrow/status.cc
+@@ -17,6 +17,17 @@
+
+ namespace arrow {
+
++const std::string& Status::NoMessage() {
++ static const std::string* no_message = new std::string();
++ return *no_message;
++}
++
++const std::shared_ptr<StatusDetail>& Status::NoDetail() {
++ static const std::shared_ptr<StatusDetail>* no_detail =
++ new std::shared_ptr<StatusDetail>();
++ return *no_detail;
++}
++
+ Status::Status(StatusCode code, const std::string& msg)
+ : Status::Status(code, msg, nullptr) {}
+
+diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
+index 983b61629d..a49a982922 100644
+--- a/cpp/src/arrow/status.h
++++ b/cpp/src/arrow/status.h
+@@ -330,14 +330,18 @@ class ARROW_EXPORT [[nodiscard]] Status : public
util::EqualityComparable<Status
+
+ /// \brief Return the specific error message attached to this status.
+ const std::string& message() const {
+- static const std::string no_message = "";
+- return ok() ? no_message : state_->msg;
++ if (ARROW_PREDICT_FALSE(state_ != NULLPTR)) {
++ return state_->msg;
++ }
++ return NoMessage();
+ }
+
+ /// \brief Return the status detail attached to this message.
+ const std::shared_ptr<StatusDetail>& detail() const {
+- static std::shared_ptr<StatusDetail> no_detail = NULLPTR;
+- return state_ ? state_->detail : no_detail;
++ if (ARROW_PREDICT_FALSE(state_ != NULLPTR)) {
++ return state_->detail;
++ }
++ return NoDetail();
+ }
+
+ const void* debug_state_addr() const { return state_; }
+@@ -396,6 +400,8 @@ class ARROW_EXPORT [[nodiscard]] Status : public
util::EqualityComparable<Status
+ delete state_;
+ state_ = NULLPTR;
+ }
++ static const std::string& NoMessage();
++ static const std::shared_ptr<StatusDetail>& NoDetail();
+ void CopyFrom(const Status& s);
+ inline void MoveFrom(Status& s);
+ };
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]