This is an automated email from the ASF dual-hosted git repository.
mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 34dc50d69 fix: Improve error handling when resolving S3 bucket region
(#2440)
34dc50d69 is described below
commit 34dc50d69cb7400ae67015953b8193bd2933304e
Author: Andy Grove <[email protected]>
AuthorDate: Mon Sep 22 13:20:04 2025 -0600
fix: Improve error handling when resolving S3 bucket region (#2440)
* Improve error handling when resolving bucket region
* address feedback
---
native/Cargo.lock | 206 +++++++++++++++++++++++++++++-
native/core/Cargo.toml | 2 +-
native/core/src/parquet/objectstore/s3.rs | 56 ++++++--
3 files changed, 250 insertions(+), 14 deletions(-)
diff --git a/native/Cargo.lock b/native/Cargo.lock
index 7f5a5fe4d..24551efb4 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -1232,6 +1232,16 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
+[[package]]
+name = "core-foundation"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
[[package]]
name = "core-foundation"
version = "0.10.1"
@@ -1528,6 +1538,7 @@ dependencies = [
"prost",
"rand",
"regex",
+ "reqwest",
"simd-adler32",
"snap",
"tempfile",
@@ -2224,6 +2235,15 @@ version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+[[package]]
+name = "encoding_rs"
+version = "0.8.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
+dependencies = [
+ "cfg-if",
+]
+
[[package]]
name = "equator"
version = "0.4.2"
@@ -2350,6 +2370,21 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+[[package]]
+name = "foreign-types"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
+dependencies = [
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
+
[[package]]
name = "form_urlencoded"
version = "1.2.2"
@@ -2775,6 +2810,22 @@ dependencies = [
"webpki-roots",
]
+[[package]]
+name = "hyper-tls"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
+dependencies = [
+ "bytes",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+ "tower-service",
+]
+
[[package]]
name = "hyper-util"
version = "0.1.17"
@@ -2794,9 +2845,11 @@ dependencies = [
"percent-encoding",
"pin-project-lite",
"socket2",
+ "system-configuration",
"tokio",
"tower-service",
"tracing",
+ "windows-registry",
]
[[package]]
@@ -3337,6 +3390,12 @@ dependencies = [
"libmimalloc-sys",
]
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
[[package]]
name = "minimal-lexical"
version = "0.2.1"
@@ -3375,6 +3434,23 @@ version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
+[[package]]
+name = "native-tls"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
+dependencies = [
+ "libc",
+ "log",
+ "openssl",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "security-framework 2.11.1",
+ "security-framework-sys",
+ "tempfile",
+]
+
[[package]]
name = "nix"
version = "0.26.4"
@@ -3587,12 +3663,50 @@ dependencies = [
"uuid",
]
+[[package]]
+name = "openssl"
+version = "0.10.73"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
+dependencies = [
+ "bitflags 2.9.4",
+ "cfg-if",
+ "foreign-types",
+ "libc",
+ "once_cell",
+ "openssl-macros",
+ "openssl-sys",
+]
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.106",
+]
+
[[package]]
name = "openssl-probe"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
+[[package]]
+name = "openssl-sys"
+version = "0.9.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
[[package]]
name = "ordered-float"
version = "2.10.1"
@@ -4164,6 +4278,7 @@ checksum =
"d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb"
dependencies = [
"base64",
"bytes",
+ "encoding_rs",
"futures-core",
"futures-util",
"h2",
@@ -4172,9 +4287,12 @@ dependencies = [
"http-body-util",
"hyper",
"hyper-rustls",
+ "hyper-tls",
"hyper-util",
"js-sys",
"log",
+ "mime",
+ "native-tls",
"percent-encoding",
"pin-project-lite",
"quinn",
@@ -4186,6 +4304,7 @@ dependencies = [
"serde_urlencoded",
"sync_wrapper",
"tokio",
+ "tokio-native-tls",
"tokio-rustls",
"tokio-util",
"tower",
@@ -4299,7 +4418,7 @@ dependencies = [
"openssl-probe",
"rustls-pki-types",
"schannel",
- "security-framework",
+ "security-framework 3.4.0",
]
[[package]]
@@ -4369,6 +4488,19 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+[[package]]
+name = "security-framework"
+version = "2.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
+dependencies = [
+ "bitflags 2.9.4",
+ "core-foundation 0.9.4",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
[[package]]
name = "security-framework"
version = "3.4.0"
@@ -4376,7 +4508,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b369d18893388b345804dc0007963c99b7d665ae71d275812d828c6f089640"
dependencies = [
"bitflags 2.9.4",
- "core-foundation",
+ "core-foundation 0.10.1",
"core-foundation-sys",
"libc",
"security-framework-sys",
@@ -4684,6 +4816,27 @@ dependencies = [
"syn 2.0.106",
]
+[[package]]
+name = "system-configuration"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
+dependencies = [
+ "bitflags 2.9.4",
+ "core-foundation 0.9.4",
+ "system-configuration-sys",
+]
+
+[[package]]
+name = "system-configuration-sys"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
[[package]]
name = "tempfile"
version = "3.22.0"
@@ -4894,6 +5047,16 @@ dependencies = [
"syn 2.0.106",
]
+[[package]]
+name = "tokio-native-tls"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
+dependencies = [
+ "native-tls",
+ "tokio",
+]
+
[[package]]
name = "tokio-rustls"
version = "0.26.2"
@@ -5110,6 +5273,12 @@ version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "943ce29a8a743eb10d6082545d861b24f9d1b160b7d741e0f2cdf726bec909c5"
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
[[package]]
name = "version_check"
version = "0.9.5"
@@ -5331,8 +5500,8 @@ dependencies = [
"windows-implement",
"windows-interface",
"windows-link 0.2.0",
- "windows-result",
- "windows-strings",
+ "windows-result 0.4.0",
+ "windows-strings 0.5.0",
]
[[package]]
@@ -5369,6 +5538,26 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
+[[package]]
+name = "windows-registry"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
+dependencies = [
+ "windows-link 0.1.3",
+ "windows-result 0.3.4",
+ "windows-strings 0.4.2",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link 0.1.3",
+]
+
[[package]]
name = "windows-result"
version = "0.4.0"
@@ -5378,6 +5567,15 @@ dependencies = [
"windows-link 0.2.0",
]
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link 0.1.3",
+]
+
[[package]]
name = "windows-strings"
version = "0.5.0"
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index b1d2b29ff..323b5d033 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -73,7 +73,7 @@ aws-config = { workspace = true }
aws-credential-types = { workspace = true }
parking_lot = "0.12.3"
datafusion-comet-objectstore-hdfs = { path = "../hdfs", optional = true,
default-features = false, features = ["hdfs"] }
-
+reqwest = "0.12.12"
object_store_opendal = {version = "0.54.0", optional = true}
hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]}
opendal = { version ="0.54.0", optional = true, features = ["services-hdfs"] }
diff --git a/native/core/src/parquet/objectstore/s3.rs
b/native/core/src/parquet/objectstore/s3.rs
index cc257bb82..6d8f011d0 100644
--- a/native/core/src/parquet/objectstore/s3.rs
+++ b/native/core/src/parquet/objectstore/s3.rs
@@ -19,11 +19,6 @@ use log::{debug, error};
use std::collections::HashMap;
use url::Url;
-use std::{
- sync::{Arc, RwLock},
- time::{Duration, SystemTime},
-};
-
use crate::execution::jni_api::get_runtime;
use async_trait::async_trait;
use aws_config::{
@@ -37,9 +32,14 @@ use aws_credential_types::{
Credentials,
};
use object_store::{
- aws::{resolve_bucket_region, AmazonS3Builder, AmazonS3ConfigKey,
AwsCredential},
+ aws::{AmazonS3Builder, AmazonS3ConfigKey, AwsCredential},
path::Path,
- ClientOptions, CredentialProvider, ObjectStore, ObjectStoreScheme,
+ CredentialProvider, ObjectStore, ObjectStoreScheme,
+};
+use std::error::Error;
+use std::{
+ sync::{Arc, RwLock},
+ time::{Duration, SystemTime},
};
/// Creates an S3 object store using options specified as Hadoop S3A
configurations.
@@ -92,8 +92,12 @@ pub fn create_store(
if !s3_configs.contains_key(&AmazonS3ConfigKey::Endpoint)
&& !s3_configs.contains_key(&AmazonS3ConfigKey::Region)
{
- let region =
- get_runtime().block_on(resolve_bucket_region(bucket,
&ClientOptions::new()))?;
+ let region = get_runtime()
+ .block_on(resolve_bucket_region(bucket))
+ .map_err(|e| object_store::Error::Generic {
+ store: "S3",
+ source: format!("Failed to resolve region: {e}").into(),
+ })?;
debug!("resolved region: {region:?}");
builder = builder.with_config(AmazonS3ConfigKey::Region,
region.to_string());
}
@@ -107,6 +111,40 @@ pub fn create_store(
Ok((Box::new(object_store), path))
}
+/// Get the bucket region using the [HeadBucket API]. This will fail if the
bucket does not exist.
+///
+/// [HeadBucket API]:
https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html
+///
+/// TODO this is copied from the object store crate and has been adapted as a
workaround
+/// for https://github.com/apache/arrow-rs-object-store/issues/479
+pub async fn resolve_bucket_region(bucket: &str) -> Result<String, Box<dyn
Error>> {
+ let endpoint = format!("https://{bucket}.s3.amazonaws.com");
+ let client = reqwest::Client::new();
+
+ let response = client.head(&endpoint).send().await?;
+
+ if response.status() == reqwest::StatusCode::NOT_FOUND {
+ return Err(Box::new(object_store::Error::Generic {
+ store: "S3",
+ source: format!("Bucket not found: {bucket}").into(),
+ }));
+ }
+
+ let region = response
+ .headers()
+ .get("x-amz-bucket-region")
+ .ok_or_else(|| {
+ Box::new(object_store::Error::Generic {
+ store: "S3",
+ source: format!("Missing region for bucket: {bucket}").into(),
+ })
+ })?
+ .to_str()?
+ .to_string();
+
+ Ok(region)
+}
+
/// Extracts S3 configuration options from Hadoop S3A configurations and
returns them
/// as a HashMap of (AmazonS3ConfigKey, String) pairs that can be applied to
an AmazonS3Builder.
///
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]