alamb commented on code in PR #3424:
URL: https://github.com/apache/arrow-rs/pull/3424#discussion_r1059756304
##########
object_store/src/aws/mod.rs:
##########
@@ -465,44 +466,41 @@ impl AmazonS3Builder {
/// .with_url("s3://bucket/path")
/// .build();
/// ```
- pub fn with_url(mut self, url: impl AsRef<str>) -> Self {
- let maybe_parsed = Url::parse(url.as_ref());
- match maybe_parsed {
- Ok(parsed) => match parsed.scheme() {
- "s3" | "s3a" => {
- self.bucket_name = parsed.host_str().map(|host|
host.to_owned());
- }
- "https" => {
- if let Some(host) = parsed.host_str() {
- let parts = host.splitn(4, '.').collect::<Vec<&str>>();
- if parts.len() == 4 && parts[0] == "s3" && parts[2] ==
"amazonaws"
- {
- self.bucket_name = Some(parts[1].to_string());
- }
- if parts.len() == 4
- && parts[1] == "s3"
- && parts[3] == "amazonaws.com"
- {
- self.bucket_name = Some(parts[0].to_string());
- self.region = Some(parts[2].to_string());
- self.virtual_hosted_style_request = true;
- }
- }
- }
- other => {
- self.url_parse_error = Some(Error::UnknownUrlScheme {
- scheme: other.into(),
- });
- }
+ pub fn with_url(mut self, url: impl Into<String>) -> Self {
+ self.url = Some(url.into());
+ self
+ }
+
+ /// Sets properties on this builder based on a URL
+ ///
+ /// This is a separate member function to allow fallible computation to
+ /// be deferred until [`Self::build`] which in turn allows deriving
[`Clone`]
+ fn parse_url(&mut self, url: &str) -> Result<()> {
+ let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
+ let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+ let bucket = match parsed.scheme() {
+ "s3" | "s3a" => match host.contains('.') {
+ false => host,
+ _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
},
- Err(err) => {
- self.url_parse_error = Some(Error::UnableToParseUrl {
- source: err,
- url: url.as_ref().into(),
- });
- }
+ "https" => match host.strip_suffix(".amazonaws.com") {
+ Some(prefix) => match prefix.strip_prefix("s3.") {
+ Some(b) => match b.contains('.') {
+ false => b, // https://s3.<bucket>.amazonaws.com
+ _ => return Err(UrlNotRecognisedSnafu { url
}.build().into()),
+ },
+ _ => match prefix.split_once('.') {
+ // https://<bucket>.s3.<region>.amazonaws.com
+ Some((b, r)) if r.starts_with("s3.") => b,
Review Comment:
I think we should make sure we cover this case with a test so we are sure it
works / and it isn't broken in a future refactoring
##########
object_store/src/aws/mod.rs:
##########
@@ -1022,15 +1020,20 @@ mod tests {
#[test]
fn s3_test_urls() {
- let builder = AmazonS3Builder::new().with_url("s3://bucket/path");
+ let mut builder = AmazonS3Builder::new();
+ builder.parse_url("s3://bucket/path").unwrap();
assert_eq!(builder.bucket_name, Some("bucket".to_string()));
- let builder =
AmazonS3Builder::new().with_url("https://s3.bucket.amazonaws.com");
+ let mut builder = AmazonS3Builder::new();
+ builder
+ .parse_url("https://s3.bucket.amazonaws.com")
+ .unwrap();
assert_eq!(builder.bucket_name, Some("bucket".to_string()));
- let builder =
-
AmazonS3Builder::new().with_url("https://bucket.s3.region.amazonaws.com");
+ let mut builder = AmazonS3Builder::new();
Review Comment:
perhaps should we add a test for parsing errors?
(also applies to the other `test_urls` functions)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]