This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 11205a891c Add AWS presigned URL support (#4876)
11205a891c is described below

commit 11205a891c637694165ce40f75e9093729d80342
Author: Carol (Nichols || Goulding) 
<[email protected]>
AuthorDate: Thu Oct 12 10:27:22 2023 -0400

    Add AWS presigned URL support (#4876)
    
    * refactor: Extract AWS algorithm string into a const
    
    * refactor: Extract a string_to_sign function and encapsulate non-reused 
values
    
    * refactor: Extract a scope function
    
    * refactor: Move hashing of canonical request into string_to_sign
    
    * refactor: Move canonical_request into string_to_sign
    
    * refactor: Move canonical URI construction into string_to_sign
    
    * refactor: Move canonical query construction into string_to_sign
    
    * feat: Implement sign method
    
    * feat: Publicly expose AWS S3 path_url for convenience constructing signed 
URLs
    
    * docs: Add an example of signing an upload URL
    
    * feat: Add a more convenient API on AmazonS3 for creating signed URLs
    
    * fix: Add credential token to the X-Amz-Security-Token query param if 
specified
    
    * fix: Change path_url to be pub crate instead of pub
    
    * feat: Define a public Signer trait for the signing interface
    
    * fix: Hide some doc test code
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    * fix: Use Method through reqwest which re-exports http anyway
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
---
 object_store/src/aws/client.rs     |   2 +-
 object_store/src/aws/credential.rs | 181 ++++++++++++++++++++++++++++++-------
 object_store/src/aws/mod.rs        |  64 ++++++++++++-
 object_store/src/lib.rs            |   2 +
 object_store/src/signer.rs         |  40 ++++++++
 5 files changed, 255 insertions(+), 34 deletions(-)

diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 1c35586f8b..e3ac60eca0 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -212,7 +212,7 @@ pub struct S3Config {
 }
 
 impl S3Config {
-    fn path_url(&self, path: &Path) -> String {
+    pub(crate) fn path_url(&self, path: &Path) -> String {
         format!("{}/{}", self.bucket_endpoint, encode_path(path))
     }
 }
diff --git a/object_store/src/aws/credential.rs 
b/object_store/src/aws/credential.rs
index be0ffa578d..e27b71f7c4 100644
--- a/object_store/src/aws/credential.rs
+++ b/object_store/src/aws/credential.rs
@@ -30,7 +30,7 @@ use reqwest::{Client, Method, Request, RequestBuilder, 
StatusCode};
 use serde::Deserialize;
 use std::collections::BTreeMap;
 use std::sync::Arc;
-use std::time::Instant;
+use std::time::{Duration, Instant};
 use tracing::warn;
 use url::Url;
 
@@ -89,6 +89,7 @@ const DATE_HEADER: &str = "x-amz-date";
 const HASH_HEADER: &str = "x-amz-content-sha256";
 const TOKEN_HEADER: &str = "x-amz-security-token";
 const AUTH_HEADER: &str = "authorization";
+const ALGORITHM: &str = "AWS4-HMAC-SHA256";
 
 impl<'a> AwsAuthorizer<'a> {
     /// Create a new [`AwsAuthorizer`]
@@ -154,21 +155,110 @@ impl<'a> AwsAuthorizer<'a> {
         let header_digest = HeaderValue::from_str(&digest).unwrap();
         request.headers_mut().insert(HASH_HEADER, header_digest);
 
-        // Each path segment must be URI-encoded twice (except for Amazon S3 
which only gets URI-encoded once).
+        let (signed_headers, canonical_headers) = 
canonicalize_headers(request.headers());
+
+        let scope = self.scope(date);
+
+        let string_to_sign = self.string_to_sign(
+            date,
+            &scope,
+            request.method(),
+            request.url(),
+            &canonical_headers,
+            &signed_headers,
+            &digest,
+        );
+
+        // sign the string
+        let signature =
+            self.credential
+                .sign(&string_to_sign, date, self.region, self.service);
+
+        // build the actual auth header
+        let authorisation = format!(
+            "{} Credential={}/{}, SignedHeaders={}, Signature={}",
+            ALGORITHM, self.credential.key_id, scope, signed_headers, signature
+        );
+
+        let authorization_val = HeaderValue::from_str(&authorisation).unwrap();
+        request.headers_mut().insert(AUTH_HEADER, authorization_val);
+    }
+
+    pub(crate) fn sign(&self, method: Method, url: &mut Url, expires_in: 
Duration) {
+        let date = self.date.unwrap_or_else(Utc::now);
+        let scope = self.scope(date);
+
+        // 
https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
+        url.query_pairs_mut()
+            .append_pair("X-Amz-Algorithm", ALGORITHM)
+            .append_pair(
+                "X-Amz-Credential",
+                &format!("{}/{}", self.credential.key_id, scope),
+            )
+            .append_pair("X-Amz-Date", 
&date.format("%Y%m%dT%H%M%SZ").to_string())
+            .append_pair("X-Amz-Expires", &expires_in.as_secs().to_string())
+            .append_pair("X-Amz-SignedHeaders", "host");
+
+        // For S3, you must include the X-Amz-Security-Token query parameter 
in the URL if
+        // using credentials sourced from the STS service.
+        if let Some(ref token) = self.credential.token {
+            url.query_pairs_mut()
+                .append_pair("X-Amz-Security-Token", token);
+        }
+
+        // We don't have a payload; the user is going to send the payload 
directly themselves.
+        let digest = UNSIGNED_PAYLOAD;
+
+        let host = 
&url[url::Position::BeforeHost..url::Position::AfterPort].to_string();
+        let mut headers = HeaderMap::new();
+        let host_val = HeaderValue::from_str(host).unwrap();
+        headers.insert("host", host_val);
+
+        let (signed_headers, canonical_headers) = 
canonicalize_headers(&headers);
+
+        let string_to_sign = self.string_to_sign(
+            date,
+            &scope,
+            &method,
+            url,
+            &canonical_headers,
+            &signed_headers,
+            digest,
+        );
+
+        let signature =
+            self.credential
+                .sign(&string_to_sign, date, self.region, self.service);
+
+        url.query_pairs_mut()
+            .append_pair("X-Amz-Signature", &signature);
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn string_to_sign(
+        &self,
+        date: DateTime<Utc>,
+        scope: &str,
+        request_method: &Method,
+        url: &Url,
+        canonical_headers: &str,
+        signed_headers: &str,
+        digest: &str,
+    ) -> String {
+        // Each path segment must be URI-encoded twice (except for Amazon S3 
which only gets
+        // URI-encoded once).
         // see 
https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
         let canonical_uri = match self.service {
-            "s3" => request.url().path().to_string(),
-            _ => utf8_percent_encode(request.url().path(), 
&STRICT_PATH_ENCODE_SET)
-                .to_string(),
+            "s3" => url.path().to_string(),
+            _ => utf8_percent_encode(url.path(), 
&STRICT_PATH_ENCODE_SET).to_string(),
         };
 
-        let (signed_headers, canonical_headers) = 
canonicalize_headers(request.headers());
-        let canonical_query = canonicalize_query(request.url());
+        let canonical_query = canonicalize_query(url);
 
         // 
https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
         let canonical_request = format!(
             "{}\n{}\n{}\n{}\n{}\n{}",
-            request.method().as_str(),
+            request_method.as_str(),
             canonical_uri,
             canonical_query,
             canonical_headers,
@@ -177,33 +267,23 @@ impl<'a> AwsAuthorizer<'a> {
         );
 
         let hashed_canonical_request = 
hex_digest(canonical_request.as_bytes());
-        let scope = format!(
-            "{}/{}/{}/aws4_request",
-            date.format("%Y%m%d"),
-            self.region,
-            self.service
-        );
 
-        let string_to_sign = format!(
-            "AWS4-HMAC-SHA256\n{}\n{}\n{}",
+        format!(
+            "{}\n{}\n{}\n{}",
+            ALGORITHM,
             date.format("%Y%m%dT%H%M%SZ"),
             scope,
             hashed_canonical_request
-        );
-
-        // sign the string
-        let signature =
-            self.credential
-                .sign(&string_to_sign, date, self.region, self.service);
-
-        // build the actual auth header
-        let authorisation = format!(
-            "AWS4-HMAC-SHA256 Credential={}/{}, SignedHeaders={}, 
Signature={}",
-            self.credential.key_id, scope, signed_headers, signature
-        );
+        )
+    }
 
-        let authorization_val = HeaderValue::from_str(&authorisation).unwrap();
-        request.headers_mut().insert(AUTH_HEADER, authorization_val);
+    fn scope(&self, date: DateTime<Utc>) -> String {
+        format!(
+            "{}/{}/{}/aws4_request",
+            date.format("%Y%m%d"),
+            self.region,
+            self.service
+        )
     }
 }
 
@@ -667,7 +747,46 @@ mod tests {
         };
 
         authorizer.authorize(&mut request, None);
-        assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), 
"AWS4-HMAC-SHA256 
Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, 
SignedHeaders=host;x-amz-content-sha256;x-amz-date, 
Signature=653c3d8ea261fd826207df58bc2bb69fbb5003e9eb3c0ef06e4a51f2a81d8699")
+        assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), 
"AWS4-HMAC-SHA256 
Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, 
SignedHeaders=host;x-amz-content-sha256;x-amz-date, 
Signature=653c3d8ea261fd826207df58bc2bb69fbb5003e9eb3c0ef06e4a51f2a81d8699");
+    }
+
+    #[test]
+    fn signed_get_url() {
+        // Values from 
https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
+        let credential = AwsCredential {
+            key_id: "AKIAIOSFODNN7EXAMPLE".to_string(),
+            secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(),
+            token: None,
+        };
+
+        let date = DateTime::parse_from_rfc3339("2013-05-24T00:00:00Z")
+            .unwrap()
+            .with_timezone(&Utc);
+
+        let authorizer = AwsAuthorizer {
+            date: Some(date),
+            credential: &credential,
+            service: "s3",
+            region: "us-east-1",
+            sign_payload: false,
+        };
+
+        let mut url =
+            
Url::parse("https://examplebucket.s3.amazonaws.com/test.txt";).unwrap();
+        authorizer.sign(Method::GET, &mut url, Duration::from_secs(86400));
+
+        assert_eq!(
+            url,
+            Url::parse(
+                "https://examplebucket.s3.amazonaws.com/test.txt?\
+                X-Amz-Algorithm=AWS4-HMAC-SHA256&\
+                
X-Amz-Credential=AKIAIOSFODNN7EXAMPLE%2F20130524%2Fus-east-1%2Fs3%2Faws4_request&\
+                X-Amz-Date=20130524T000000Z&\
+                X-Amz-Expires=86400&\
+                X-Amz-SignedHeaders=host&\
+                
X-Amz-Signature=aeeed9bbccd4d02ee5c0109b86d86835f995330da4c265957d157751f604d404"
+            ).unwrap()
+        );
     }
 
     #[test]
diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index db3e1b9a4b..0028be99fa 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -36,10 +36,10 @@ use bytes::Bytes;
 use futures::stream::BoxStream;
 use futures::{StreamExt, TryStreamExt};
 use itertools::Itertools;
+use reqwest::Method;
 use serde::{Deserialize, Serialize};
 use snafu::{ensure, OptionExt, ResultExt, Snafu};
-use std::str::FromStr;
-use std::sync::Arc;
+use std::{str::FromStr, sync::Arc, time::Duration};
 use tokio::io::AsyncWrite;
 use tracing::info;
 use url::Url;
@@ -56,6 +56,7 @@ use crate::client::{
 };
 use crate::config::ConfigValue;
 use crate::multipart::{PartId, PutPart, WriteMultiPart};
+use crate::signer::Signer;
 use crate::{
     ClientOptions, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta,
     ObjectStore, Path, Result, RetryConfig,
@@ -209,6 +210,65 @@ impl AmazonS3 {
     pub fn credentials(&self) -> &AwsCredentialProvider {
         &self.client.config().credentials
     }
+
+    /// Create a full URL to the resource specified by `path` with this 
instance's configuration.
+    fn path_url(&self, path: &Path) -> String {
+        self.client.config().path_url(path)
+    }
+}
+
+#[async_trait]
+impl Signer for AmazonS3 {
+    /// Create a URL containing the relevant [AWS SigV4] query parameters that 
authorize a request
+    /// via `method` to the resource at `path` valid for the duration 
specified in `expires_in`.
+    ///
+    /// [AWS SigV4]: 
https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
+    ///
+    /// # Example
+    ///
+    /// This example returns a URL that will enable a user to upload a file to
+    /// "some-folder/some-file.txt" in the next hour.
+    ///
+    /// ```
+    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// # use object_store::{aws::AmazonS3Builder, path::Path, signer::Signer};
+    /// # use reqwest::Method;
+    /// # use std::time::Duration;
+    /// #
+    /// let region = "us-east-1";
+    /// let s3 = AmazonS3Builder::new()
+    ///     .with_region(region)
+    ///     .with_bucket_name("my-bucket")
+    ///     .with_access_key_id("my-access-key-id")
+    ///     .with_secret_access_key("my-secret-access-key")
+    ///     .build()?;
+    ///
+    /// let url = s3.signed_url(
+    ///     Method::PUT,
+    ///     &Path::from("some-folder/some-file.txt"),
+    ///     Duration::from_secs(60 * 60)
+    /// ).await?;
+    /// #     Ok(())
+    /// # }
+    /// ```
+    async fn signed_url(
+        &self,
+        method: Method,
+        path: &Path,
+        expires_in: Duration,
+    ) -> Result<Url> {
+        let credential = self.credentials().get_credential().await?;
+        let authorizer =
+            AwsAuthorizer::new(&credential, "s3", 
&self.client.config().region);
+
+        let path_url = self.path_url(path);
+        let mut url =
+            Url::parse(&path_url).context(UnableToParseUrlSnafu { url: 
path_url })?;
+
+        authorizer.sign(method, &mut url, expires_in);
+
+        Ok(url)
+    }
 }
 
 #[async_trait]
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 3fd363fd4f..68e785b3a3 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -267,6 +267,8 @@ pub mod local;
 pub mod memory;
 pub mod path;
 pub mod prefix;
+#[cfg(feature = "cloud")]
+pub mod signer;
 pub mod throttle;
 
 #[cfg(feature = "cloud")]
diff --git a/object_store/src/signer.rs b/object_store/src/signer.rs
new file mode 100644
index 0000000000..f1f35debe0
--- /dev/null
+++ b/object_store/src/signer.rs
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Abstraction of signed URL generation for those object store 
implementations that support it
+
+use crate::{path::Path, Result};
+use async_trait::async_trait;
+use reqwest::Method;
+use std::{fmt, time::Duration};
+use url::Url;
+
+/// Universal API to presigned URLs generated from multiple object store 
services. Not supported by
+/// all object store services.
+#[async_trait]
+pub trait Signer: Send + Sync + fmt::Debug + 'static {
+    /// Given the intended [`Method`] and [`Path`] to use and the desired 
length of time for which
+    /// the URL should be valid, return a signed [`Url`] created with the 
object store
+    /// implementation's credentials such that the URL can be handed to 
something that doesn't have
+    /// access to the object store's credentials, to allow limited access to 
the object store.
+    async fn signed_url(
+        &self,
+        method: Method,
+        path: &Path,
+        expires_in: Duration,
+    ) -> Result<Url>;
+}

Reply via email to