This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 11205a891c Add AWS presigned URL support (#4876)
11205a891c is described below
commit 11205a891c637694165ce40f75e9093729d80342
Author: Carol (Nichols || Goulding)
<[email protected]>
AuthorDate: Thu Oct 12 10:27:22 2023 -0400
Add AWS presigned URL support (#4876)
* refactor: Extract AWS algorithm string into a const
* refactor: Extract a string_to_sign function and encapsulate non-reused
values
* refactor: Extract a scope function
* refactor: Move hashing of canonical request into string_to_sign
* refactor: Move canonical_request into string_to_sign
* refactor: Move canonical URI construction into string_to_sign
* refactor: Move canonical query construction into string_to_sign
* feat: Implement sign method
* feat: Publicly expose AWS S3 path_url for convenience constructing signed
URLs
* docs: Add an example of signing an upload URL
* feat: Add a more convenient API on AmazonS3 for creating signed URLs
* fix: Add credential token to the X-Amz-Security-Token query param if
specified
* fix: Change path_url to be pub crate instead of pub
* feat: Define a public Signer trait for the signing interface
* fix: Hide some doc test code
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
* fix: Use Method through reqwest which re-exports http anyway
---------
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---
object_store/src/aws/client.rs | 2 +-
object_store/src/aws/credential.rs | 181 ++++++++++++++++++++++++++++++-------
object_store/src/aws/mod.rs | 64 ++++++++++++-
object_store/src/lib.rs | 2 +
object_store/src/signer.rs | 40 ++++++++
5 files changed, 255 insertions(+), 34 deletions(-)
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 1c35586f8b..e3ac60eca0 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -212,7 +212,7 @@ pub struct S3Config {
}
impl S3Config {
- fn path_url(&self, path: &Path) -> String {
+ pub(crate) fn path_url(&self, path: &Path) -> String {
format!("{}/{}", self.bucket_endpoint, encode_path(path))
}
}
diff --git a/object_store/src/aws/credential.rs
b/object_store/src/aws/credential.rs
index be0ffa578d..e27b71f7c4 100644
--- a/object_store/src/aws/credential.rs
+++ b/object_store/src/aws/credential.rs
@@ -30,7 +30,7 @@ use reqwest::{Client, Method, Request, RequestBuilder,
StatusCode};
use serde::Deserialize;
use std::collections::BTreeMap;
use std::sync::Arc;
-use std::time::Instant;
+use std::time::{Duration, Instant};
use tracing::warn;
use url::Url;
@@ -89,6 +89,7 @@ const DATE_HEADER: &str = "x-amz-date";
const HASH_HEADER: &str = "x-amz-content-sha256";
const TOKEN_HEADER: &str = "x-amz-security-token";
const AUTH_HEADER: &str = "authorization";
+const ALGORITHM: &str = "AWS4-HMAC-SHA256";
impl<'a> AwsAuthorizer<'a> {
/// Create a new [`AwsAuthorizer`]
@@ -154,21 +155,110 @@ impl<'a> AwsAuthorizer<'a> {
let header_digest = HeaderValue::from_str(&digest).unwrap();
request.headers_mut().insert(HASH_HEADER, header_digest);
- // Each path segment must be URI-encoded twice (except for Amazon S3
which only gets URI-encoded once).
+ let (signed_headers, canonical_headers) =
canonicalize_headers(request.headers());
+
+ let scope = self.scope(date);
+
+ let string_to_sign = self.string_to_sign(
+ date,
+ &scope,
+ request.method(),
+ request.url(),
+ &canonical_headers,
+ &signed_headers,
+ &digest,
+ );
+
+ // sign the string
+ let signature =
+ self.credential
+ .sign(&string_to_sign, date, self.region, self.service);
+
+ // build the actual auth header
+ let authorisation = format!(
+ "{} Credential={}/{}, SignedHeaders={}, Signature={}",
+ ALGORITHM, self.credential.key_id, scope, signed_headers, signature
+ );
+
+ let authorization_val = HeaderValue::from_str(&authorisation).unwrap();
+ request.headers_mut().insert(AUTH_HEADER, authorization_val);
+ }
+
+ pub(crate) fn sign(&self, method: Method, url: &mut Url, expires_in:
Duration) {
+ let date = self.date.unwrap_or_else(Utc::now);
+ let scope = self.scope(date);
+
+ //
https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
+ url.query_pairs_mut()
+ .append_pair("X-Amz-Algorithm", ALGORITHM)
+ .append_pair(
+ "X-Amz-Credential",
+ &format!("{}/{}", self.credential.key_id, scope),
+ )
+ .append_pair("X-Amz-Date",
&date.format("%Y%m%dT%H%M%SZ").to_string())
+ .append_pair("X-Amz-Expires", &expires_in.as_secs().to_string())
+ .append_pair("X-Amz-SignedHeaders", "host");
+
+ // For S3, you must include the X-Amz-Security-Token query parameter
in the URL if
+ // using credentials sourced from the STS service.
+ if let Some(ref token) = self.credential.token {
+ url.query_pairs_mut()
+ .append_pair("X-Amz-Security-Token", token);
+ }
+
+ // We don't have a payload; the user is going to send the payload
directly themselves.
+ let digest = UNSIGNED_PAYLOAD;
+
+ let host =
&url[url::Position::BeforeHost..url::Position::AfterPort].to_string();
+ let mut headers = HeaderMap::new();
+ let host_val = HeaderValue::from_str(host).unwrap();
+ headers.insert("host", host_val);
+
+ let (signed_headers, canonical_headers) =
canonicalize_headers(&headers);
+
+ let string_to_sign = self.string_to_sign(
+ date,
+ &scope,
+ &method,
+ url,
+ &canonical_headers,
+ &signed_headers,
+ digest,
+ );
+
+ let signature =
+ self.credential
+ .sign(&string_to_sign, date, self.region, self.service);
+
+ url.query_pairs_mut()
+ .append_pair("X-Amz-Signature", &signature);
+ }
+
+ #[allow(clippy::too_many_arguments)]
+ fn string_to_sign(
+ &self,
+ date: DateTime<Utc>,
+ scope: &str,
+ request_method: &Method,
+ url: &Url,
+ canonical_headers: &str,
+ signed_headers: &str,
+ digest: &str,
+ ) -> String {
+ // Each path segment must be URI-encoded twice (except for Amazon S3
which only gets
+ // URI-encoded once).
// see
https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
let canonical_uri = match self.service {
- "s3" => request.url().path().to_string(),
- _ => utf8_percent_encode(request.url().path(),
&STRICT_PATH_ENCODE_SET)
- .to_string(),
+ "s3" => url.path().to_string(),
+ _ => utf8_percent_encode(url.path(),
&STRICT_PATH_ENCODE_SET).to_string(),
};
- let (signed_headers, canonical_headers) =
canonicalize_headers(request.headers());
- let canonical_query = canonicalize_query(request.url());
+ let canonical_query = canonicalize_query(url);
//
https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
let canonical_request = format!(
"{}\n{}\n{}\n{}\n{}\n{}",
- request.method().as_str(),
+ request_method.as_str(),
canonical_uri,
canonical_query,
canonical_headers,
@@ -177,33 +267,23 @@ impl<'a> AwsAuthorizer<'a> {
);
let hashed_canonical_request =
hex_digest(canonical_request.as_bytes());
- let scope = format!(
- "{}/{}/{}/aws4_request",
- date.format("%Y%m%d"),
- self.region,
- self.service
- );
- let string_to_sign = format!(
- "AWS4-HMAC-SHA256\n{}\n{}\n{}",
+ format!(
+ "{}\n{}\n{}\n{}",
+ ALGORITHM,
date.format("%Y%m%dT%H%M%SZ"),
scope,
hashed_canonical_request
- );
-
- // sign the string
- let signature =
- self.credential
- .sign(&string_to_sign, date, self.region, self.service);
-
- // build the actual auth header
- let authorisation = format!(
- "AWS4-HMAC-SHA256 Credential={}/{}, SignedHeaders={},
Signature={}",
- self.credential.key_id, scope, signed_headers, signature
- );
+ )
+ }
- let authorization_val = HeaderValue::from_str(&authorisation).unwrap();
- request.headers_mut().insert(AUTH_HEADER, authorization_val);
+ fn scope(&self, date: DateTime<Utc>) -> String {
+ format!(
+ "{}/{}/{}/aws4_request",
+ date.format("%Y%m%d"),
+ self.region,
+ self.service
+ )
}
}
@@ -667,7 +747,46 @@ mod tests {
};
authorizer.authorize(&mut request, None);
- assert_eq!(request.headers().get(AUTH_HEADER).unwrap(),
"AWS4-HMAC-SHA256
Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request,
SignedHeaders=host;x-amz-content-sha256;x-amz-date,
Signature=653c3d8ea261fd826207df58bc2bb69fbb5003e9eb3c0ef06e4a51f2a81d8699")
+ assert_eq!(request.headers().get(AUTH_HEADER).unwrap(),
"AWS4-HMAC-SHA256
Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request,
SignedHeaders=host;x-amz-content-sha256;x-amz-date,
Signature=653c3d8ea261fd826207df58bc2bb69fbb5003e9eb3c0ef06e4a51f2a81d8699");
+ }
+
+ #[test]
+ fn signed_get_url() {
+ // Values from
https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
+ let credential = AwsCredential {
+ key_id: "AKIAIOSFODNN7EXAMPLE".to_string(),
+ secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(),
+ token: None,
+ };
+
+ let date = DateTime::parse_from_rfc3339("2013-05-24T00:00:00Z")
+ .unwrap()
+ .with_timezone(&Utc);
+
+ let authorizer = AwsAuthorizer {
+ date: Some(date),
+ credential: &credential,
+ service: "s3",
+ region: "us-east-1",
+ sign_payload: false,
+ };
+
+ let mut url =
+
Url::parse("https://examplebucket.s3.amazonaws.com/test.txt").unwrap();
+ authorizer.sign(Method::GET, &mut url, Duration::from_secs(86400));
+
+ assert_eq!(
+ url,
+ Url::parse(
+ "https://examplebucket.s3.amazonaws.com/test.txt?\
+ X-Amz-Algorithm=AWS4-HMAC-SHA256&\
+
X-Amz-Credential=AKIAIOSFODNN7EXAMPLE%2F20130524%2Fus-east-1%2Fs3%2Faws4_request&\
+ X-Amz-Date=20130524T000000Z&\
+ X-Amz-Expires=86400&\
+ X-Amz-SignedHeaders=host&\
+
X-Amz-Signature=aeeed9bbccd4d02ee5c0109b86d86835f995330da4c265957d157751f604d404"
+ ).unwrap()
+ );
}
#[test]
diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index db3e1b9a4b..0028be99fa 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -36,10 +36,10 @@ use bytes::Bytes;
use futures::stream::BoxStream;
use futures::{StreamExt, TryStreamExt};
use itertools::Itertools;
+use reqwest::Method;
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt, Snafu};
-use std::str::FromStr;
-use std::sync::Arc;
+use std::{str::FromStr, sync::Arc, time::Duration};
use tokio::io::AsyncWrite;
use tracing::info;
use url::Url;
@@ -56,6 +56,7 @@ use crate::client::{
};
use crate::config::ConfigValue;
use crate::multipart::{PartId, PutPart, WriteMultiPart};
+use crate::signer::Signer;
use crate::{
ClientOptions, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta,
ObjectStore, Path, Result, RetryConfig,
@@ -209,6 +210,65 @@ impl AmazonS3 {
pub fn credentials(&self) -> &AwsCredentialProvider {
&self.client.config().credentials
}
+
+ /// Create a full URL to the resource specified by `path` with this
instance's configuration.
+ fn path_url(&self, path: &Path) -> String {
+ self.client.config().path_url(path)
+ }
+}
+
+#[async_trait]
+impl Signer for AmazonS3 {
+ /// Create a URL containing the relevant [AWS SigV4] query parameters that
authorize a request
+ /// via `method` to the resource at `path` valid for the duration
specified in `expires_in`.
+ ///
+ /// [AWS SigV4]:
https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
+ ///
+ /// # Example
+ ///
+ /// This example returns a URL that will enable a user to upload a file to
+ /// "some-folder/some-file.txt" in the next hour.
+ ///
+ /// ```
+ /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+ /// # use object_store::{aws::AmazonS3Builder, path::Path, signer::Signer};
+ /// # use reqwest::Method;
+ /// # use std::time::Duration;
+ /// #
+ /// let region = "us-east-1";
+ /// let s3 = AmazonS3Builder::new()
+ /// .with_region(region)
+ /// .with_bucket_name("my-bucket")
+ /// .with_access_key_id("my-access-key-id")
+ /// .with_secret_access_key("my-secret-access-key")
+ /// .build()?;
+ ///
+ /// let url = s3.signed_url(
+ /// Method::PUT,
+ /// &Path::from("some-folder/some-file.txt"),
+ /// Duration::from_secs(60 * 60)
+ /// ).await?;
+ /// # Ok(())
+ /// # }
+ /// ```
+ async fn signed_url(
+ &self,
+ method: Method,
+ path: &Path,
+ expires_in: Duration,
+ ) -> Result<Url> {
+ let credential = self.credentials().get_credential().await?;
+ let authorizer =
+ AwsAuthorizer::new(&credential, "s3",
&self.client.config().region);
+
+ let path_url = self.path_url(path);
+ let mut url =
+ Url::parse(&path_url).context(UnableToParseUrlSnafu { url:
path_url })?;
+
+ authorizer.sign(method, &mut url, expires_in);
+
+ Ok(url)
+ }
}
#[async_trait]
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 3fd363fd4f..68e785b3a3 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -267,6 +267,8 @@ pub mod local;
pub mod memory;
pub mod path;
pub mod prefix;
+#[cfg(feature = "cloud")]
+pub mod signer;
pub mod throttle;
#[cfg(feature = "cloud")]
diff --git a/object_store/src/signer.rs b/object_store/src/signer.rs
new file mode 100644
index 0000000000..f1f35debe0
--- /dev/null
+++ b/object_store/src/signer.rs
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Abstraction of signed URL generation for those object store
implementations that support it
+
+use crate::{path::Path, Result};
+use async_trait::async_trait;
+use reqwest::Method;
+use std::{fmt, time::Duration};
+use url::Url;
+
+/// Universal API to presigned URLs generated from multiple object store
services. Not supported by
+/// all object store services.
+#[async_trait]
+pub trait Signer: Send + Sync + fmt::Debug + 'static {
+ /// Given the intended [`Method`] and [`Path`] to use and the desired
length of time for which
+ /// the URL should be valid, return a signed [`Url`] created with the
object store
+ /// implementation's credentials such that the URL can be handed to
something that doesn't have
+ /// access to the object store's credentials, to allow limited access to
the object store.
+ async fn signed_url(
+ &self,
+ method: Method,
+ path: &Path,
+ expires_in: Duration,
+ ) -> Result<Url>;
+}