This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 68fb4644ea GH-50009: [R] FinalizeS3 segfaults for stale connection
(#50081)
68fb4644ea is described below
commit 68fb4644eabebca683b37ac9500e742da75585df
Author: Nic Crane <[email protected]>
AuthorDate: Mon Jun 8 12:43:29 2026 +0200
GH-50009: [R] FinalizeS3 segfaults for stale connection (#50081)
### Rationale for this change
User experiences issues with process crashing when reading/writing from S3.
Looks like a stale connection and sigpipe stuff. See also #32026
### What changes are included in this PR?
Install sigpipe handler upon S3 initialisation so it'll not kill the
process.
### Are these changes tested?
No - and I'm not sure how I can really test this out.
### Are there any user-facing changes?
No
* GitHub Issue: #50009
Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/src/filesystem.cpp | 31 ++++++++++++++++++++++++++++---
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp
index 82cf99514d..9324a13ce0 100644
--- a/r/src/filesystem.cpp
+++ b/r/src/filesystem.cpp
@@ -256,10 +256,23 @@ std::string fs___SubTreeFileSystem__base_path(
return file_system->base_path();
}
+// Forward declaration - defined in the ARROW_R_WITH_S3 block below.
+#if defined(ARROW_R_WITH_S3)
+void EnsureS3InitializedWithSigpipeHandler();
+#endif
+
// [[arrow::export]]
cpp11::writable::list fs___FileSystemFromUri(const std::string& path) {
using cpp11::literals::operator""_nm;
+#if defined(ARROW_R_WITH_S3)
+ // Initialize S3 before FileSystemFromUri so our options (with SIGPIPE
handler)
+ // take effect before the C++ library's internal EnsureS3Initialized() call.
+ if (path.substr(0, 5) == "s3://") {
+ EnsureS3InitializedWithSigpipeHandler();
+ }
+#endif
+
std::string out_path;
auto io_context = MainRThread::GetInstance().CancellableIOContext();
return cpp11::writable::list({"fs"_nm = cpp11::to_r6(ValueOrStop(
@@ -281,6 +294,20 @@ void fs___CopyFiles(const std::shared_ptr<fs::FileSystem>&
source_fs,
#include <arrow/filesystem/s3fs.h>
+// Initialize S3 with the SIGPIPE handler enabled. Without it, stale
connections
+// in the SDK's connection pool can trigger SIGPIPE during Aws::ShutdownAPI(),
+// which causes R's signal handler to longjmp out of the teardown and segfault
+// (GH-50009, GH-32026).
+void EnsureS3InitializedWithSigpipeHandler() {
+ fs::S3GlobalOptions options = fs::S3GlobalOptions::Defaults();
+ options.install_sigpipe_handler = true;
+ auto status = fs::InitializeS3(options);
+ // InitializeS3 returns Invalid if already initialized - that's fine
+ if (!status.ok() && !fs::IsS3Initialized()) {
+ StopIfNotOk(status);
+ }
+}
+
// [[s3::export]]
std::shared_ptr<fs::S3FileSystem> fs___S3FileSystem__create(
bool anonymous = false, std::string access_key = "", std::string
secret_key = "",
@@ -291,9 +318,7 @@ std::shared_ptr<fs::S3FileSystem> fs___S3FileSystem__create(
bool allow_bucket_creation = false, bool allow_bucket_deletion = false,
bool check_directory_existence_before_creation = false, double
connect_timeout = -1,
double request_timeout = -1) {
- // We need to ensure that S3 is initialized before we start messing with the
- // options
- StopIfNotOk(fs::EnsureS3Initialized());
+ EnsureS3InitializedWithSigpipeHandler();
fs::S3Options s3_opts;
// Handle auth (anonymous, keys, default)
// (validation/internal coherence handled in R)