roeap commented on code in PR #3424:
URL: https://github.com/apache/arrow-rs/pull/3424#discussion_r1059725335
##########
object_store/src/aws/mod.rs:
##########
@@ -465,44 +466,41 @@ impl AmazonS3Builder {
/// .with_url("s3://bucket/path")
/// .build();
/// ```
- pub fn with_url(mut self, url: impl AsRef<str>) -> Self {
- let maybe_parsed = Url::parse(url.as_ref());
- match maybe_parsed {
- Ok(parsed) => match parsed.scheme() {
- "s3" | "s3a" => {
- self.bucket_name = parsed.host_str().map(|host|
host.to_owned());
- }
- "https" => {
- if let Some(host) = parsed.host_str() {
- let parts = host.splitn(4, '.').collect::<Vec<&str>>();
- if parts.len() == 4 && parts[0] == "s3" && parts[2] ==
"amazonaws"
- {
- self.bucket_name = Some(parts[1].to_string());
- }
- if parts.len() == 4
- && parts[1] == "s3"
- && parts[3] == "amazonaws.com"
- {
- self.bucket_name = Some(parts[0].to_string());
- self.region = Some(parts[2].to_string());
- self.virtual_hosted_style_request = true;
- }
- }
- }
- other => {
- self.url_parse_error = Some(Error::UnknownUrlScheme {
- scheme: other.into(),
- });
- }
+ pub fn with_url(mut self, url: impl Into<String>) -> Self {
+ self.url = Some(url.into());
+ self
+ }
+
+ /// Sets properties on this builder based on a URL
+ ///
+ /// This is a separate member function to allow fallible computation to
+ /// be deferred until [`Self::build`] which in turn allows deriving
[`Clone`]
+ fn parse_url(&mut self, url: &str) -> Result<()> {
+ let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
+ let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+ let bucket = match parsed.scheme() {
+ "s3" | "s3a" => match host.contains('.') {
+ false => host,
+ _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
},
- Err(err) => {
- self.url_parse_error = Some(Error::UnableToParseUrl {
- source: err,
- url: url.as_ref().into(),
- });
- }
+ "https" => match host.strip_suffix(".amazonaws.com") {
+ Some(prefix) => match prefix.strip_prefix("s3.") {
+ Some(b) => match b.contains('.') {
+ false => b, // https://s3.<bucket>.amazonaws.com
+ _ => return Err(UrlNotRecognisedSnafu { url
}.build().into()),
+ },
+ _ => match prefix.split_once('.') {
+ // https://<bucket>.s3.<region>.amazonaws.com
+ Some((b, r)) if r.starts_with("s3.") => b,
Review Comment:
Should we set `region` and `virtual_hosted_style_request` here before
returning the bucket? Otherwise I think the user would have to set these
separetely for things to work.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]