This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 7feb5425e6b Add more attributes (#5690)
7feb5425e6b is described below

commit 7feb5425e6b5d251ac1ec94c7b102a6e64d9b496
Author: nett_hier <[email protected]>
AuthorDate: Fri Apr 26 12:40:28 2024 +0200

    Add more attributes (#5690)
    
    Signed-off-by: netthier <[email protected]>
---
 object_store/src/attributes.rs   | 36 ++++++++++++++++++++---
 object_store/src/aws/client.rs   | 10 +++++--
 object_store/src/azure/client.rs |  9 ++++++
 object_store/src/client/get.rs   | 62 ++++++++++++++++++++++++++++++++++------
 object_store/src/gcp/client.rs   |  8 +++++-
 object_store/src/http/client.rs  | 11 +++++--
 object_store/src/lib.rs          |  8 +++++-
 7 files changed, 125 insertions(+), 19 deletions(-)

diff --git a/object_store/src/attributes.rs b/object_store/src/attributes.rs
index 9b90b532585..ecef32eb48a 100644
--- a/object_store/src/attributes.rs
+++ b/object_store/src/attributes.rs
@@ -23,6 +23,18 @@ use std::ops::Deref;
 #[non_exhaustive]
 #[derive(Debug, Hash, Eq, PartialEq, Clone)]
 pub enum Attribute {
+    /// Specifies how the object should be handled by a browser
+    ///
+    /// See 
[Content-Disposition](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition)
+    ContentDisposition,
+    /// Specifies the encodings applied to the object
+    ///
+    /// See 
[Content-Encoding](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding)
+    ContentEncoding,
+    /// Specifies the language of the object
+    ///
+    /// See 
[Content-Language](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language)
+    ContentLanguage,
     /// Specifies the MIME type of the object
     ///
     /// This takes precedence over any [ClientOptions](crate::ClientOptions) 
configuration
@@ -177,12 +189,15 @@ mod tests {
     #[test]
     fn test_attributes_basic() {
         let mut attributes = Attributes::from_iter([
+            (Attribute::ContentDisposition, "inline"),
+            (Attribute::ContentEncoding, "gzip"),
+            (Attribute::ContentLanguage, "en-US"),
             (Attribute::ContentType, "test"),
             (Attribute::CacheControl, "control"),
         ]);
 
         assert!(!attributes.is_empty());
-        assert_eq!(attributes.len(), 2);
+        assert_eq!(attributes.len(), 5);
 
         assert_eq!(
             attributes.get(&Attribute::ContentType),
@@ -195,17 +210,30 @@ mod tests {
             attributes.insert(Attribute::CacheControl, "v1".into()),
             Some(metav)
         );
-        assert_eq!(attributes.len(), 2);
+        assert_eq!(attributes.len(), 5);
 
         assert_eq!(
             attributes.remove(&Attribute::CacheControl).unwrap(),
             "v1".into()
         );
-        assert_eq!(attributes.len(), 1);
+        assert_eq!(attributes.len(), 4);
 
         let metav: AttributeValue = "v2".into();
         attributes.insert(Attribute::CacheControl, metav.clone());
         assert_eq!(attributes.get(&Attribute::CacheControl), Some(&metav));
-        assert_eq!(attributes.len(), 2);
+        assert_eq!(attributes.len(), 5);
+
+        assert_eq!(
+            attributes.get(&Attribute::ContentDisposition),
+            Some(&"inline".into())
+        );
+        assert_eq!(
+            attributes.get(&Attribute::ContentEncoding),
+            Some(&"gzip".into())
+        );
+        assert_eq!(
+            attributes.get(&Attribute::ContentLanguage),
+            Some(&"en-US".into())
+        );
     }
 }
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 4a4dc178d5b..24247688e86 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -42,14 +42,17 @@ use async_trait::async_trait;
 use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
 use bytes::{Buf, Bytes};
-use hyper::header::{CACHE_CONTROL, CONTENT_LENGTH};
+use hyper::header::{
+    CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, 
CONTENT_LENGTH,
+    CONTENT_TYPE,
+};
 use hyper::http::HeaderName;
 use hyper::{http, HeaderMap};
 use itertools::Itertools;
 use md5::{Digest, Md5};
 use percent_encoding::{utf8_percent_encode, PercentEncode};
 use quick_xml::events::{self as xml_events};
-use reqwest::{header::CONTENT_TYPE, Client as ReqwestClient, Method, 
RequestBuilder, Response};
+use reqwest::{Client as ReqwestClient, Method, RequestBuilder, Response};
 use ring::digest;
 use ring::digest::Context;
 use serde::{Deserialize, Serialize};
@@ -322,6 +325,9 @@ impl<'a> Request<'a> {
         for (k, v) in &attributes {
             builder = match k {
                 Attribute::CacheControl => builder.header(CACHE_CONTROL, 
v.as_ref()),
+                Attribute::ContentDisposition => 
builder.header(CONTENT_DISPOSITION, v.as_ref()),
+                Attribute::ContentEncoding => builder.header(CONTENT_ENCODING, 
v.as_ref()),
+                Attribute::ContentLanguage => builder.header(CONTENT_LANGUAGE, 
v.as_ref()),
                 Attribute::ContentType => {
                     has_content_type = true;
                     builder.header(CONTENT_TYPE, v.as_ref())
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index 918fcd047ae..311bd72ff52 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -50,6 +50,10 @@ use url::Url;
 const VERSION_HEADER: &str = "x-ms-version-id";
 static MS_CACHE_CONTROL: HeaderName = 
HeaderName::from_static("x-ms-blob-cache-control");
 static MS_CONTENT_TYPE: HeaderName = 
HeaderName::from_static("x-ms-blob-content-type");
+static MS_CONTENT_DISPOSITION: HeaderName =
+    HeaderName::from_static("x-ms-blob-content-disposition");
+static MS_CONTENT_ENCODING: HeaderName = 
HeaderName::from_static("x-ms-blob-content-encoding");
+static MS_CONTENT_LANGUAGE: HeaderName = 
HeaderName::from_static("x-ms-blob-content-language");
 
 static TAGS_HEADER: HeaderName = HeaderName::from_static("x-ms-tags");
 
@@ -206,6 +210,11 @@ impl<'a> PutRequest<'a> {
         for (k, v) in &attributes {
             builder = match k {
                 Attribute::CacheControl => builder.header(&MS_CACHE_CONTROL, 
v.as_ref()),
+                Attribute::ContentDisposition => {
+                    builder.header(&MS_CONTENT_DISPOSITION, v.as_ref())
+                }
+                Attribute::ContentEncoding => 
builder.header(&MS_CONTENT_ENCODING, v.as_ref()),
+                Attribute::ContentLanguage => 
builder.header(&MS_CONTENT_LANGUAGE, v.as_ref()),
                 Attribute::ContentType => {
                     has_content_type = true;
                     builder.header(&MS_CONTENT_TYPE, v.as_ref())
diff --git a/object_store/src/client/get.rs b/object_store/src/client/get.rs
index f700457611f..430b87b1ba3 100644
--- a/object_store/src/client/get.rs
+++ b/object_store/src/client/get.rs
@@ -22,7 +22,10 @@ use crate::path::Path;
 use crate::{Attribute, Attributes, GetOptions, GetRange, GetResult, 
GetResultPayload, Result};
 use async_trait::async_trait;
 use futures::{StreamExt, TryStreamExt};
-use hyper::header::{CACHE_CONTROL, CONTENT_RANGE, CONTENT_TYPE};
+use hyper::header::{
+    CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, 
CONTENT_RANGE,
+    CONTENT_TYPE,
+};
 use hyper::StatusCode;
 use reqwest::header::ToStrError;
 use reqwest::Response;
@@ -120,6 +123,15 @@ enum GetResultError {
     #[snafu(display("Cache-Control header contained non UTF-8 characters"))]
     InvalidCacheControl { source: ToStrError },
 
+    #[snafu(display("Content-Disposition header contained non UTF-8 
characters"))]
+    InvalidContentDisposition { source: ToStrError },
+
+    #[snafu(display("Content-Encoding header contained non UTF-8 characters"))]
+    InvalidContentEncoding { source: ToStrError },
+
+    #[snafu(display("Content-Language header contained non UTF-8 characters"))]
+    InvalidContentLanguage { source: ToStrError },
+
     #[snafu(display("Content-Type header contained non UTF-8 characters"))]
     InvalidContentType { source: ToStrError },
 
@@ -167,16 +179,48 @@ fn get_result<T: GetClient>(
         0..meta.size
     };
 
-    let mut attributes = Attributes::new();
-    if let Some(x) = response.headers().get(CACHE_CONTROL) {
-        let x = x.to_str().context(InvalidCacheControlSnafu)?;
-        attributes.insert(Attribute::CacheControl, x.to_string().into());
-    }
-    if let Some(x) = response.headers().get(CONTENT_TYPE) {
-        let x = x.to_str().context(InvalidContentTypeSnafu)?;
-        attributes.insert(Attribute::ContentType, x.to_string().into());
+    macro_rules! parse_attributes {
+        ($headers:expr, $(($header:expr, $attr:expr, $err:expr)),*) => {{
+            let mut attributes = Attributes::new();
+            $(
+            if let Some(x) = $headers.get($header) {
+                let x = x.to_str().context($err)?;
+                attributes.insert($attr, x.to_string().into());
+            }
+            )*
+            attributes
+        }}
     }
 
+    let attributes = parse_attributes!(
+        response.headers(),
+        (
+            CACHE_CONTROL,
+            Attribute::CacheControl,
+            InvalidCacheControlSnafu
+        ),
+        (
+            CONTENT_DISPOSITION,
+            Attribute::ContentDisposition,
+            InvalidContentDispositionSnafu
+        ),
+        (
+            CONTENT_ENCODING,
+            Attribute::ContentEncoding,
+            InvalidContentEncodingSnafu
+        ),
+        (
+            CONTENT_LANGUAGE,
+            Attribute::ContentLanguage,
+            InvalidContentLanguageSnafu
+        ),
+        (
+            CONTENT_TYPE,
+            Attribute::ContentType,
+            InvalidContentTypeSnafu
+        )
+    );
+
     let stream = response
         .bytes_stream()
         .map_err(|source| crate::Error::Generic {
diff --git a/object_store/src/gcp/client.rs b/object_store/src/gcp/client.rs
index 9c39efe6b23..a5493256546 100644
--- a/object_store/src/gcp/client.rs
+++ b/object_store/src/gcp/client.rs
@@ -36,7 +36,10 @@ use async_trait::async_trait;
 use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
 use bytes::Buf;
-use hyper::header::{CACHE_CONTROL, CONTENT_LENGTH, CONTENT_TYPE};
+use hyper::header::{
+    CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, 
CONTENT_LENGTH,
+    CONTENT_TYPE,
+};
 use percent_encoding::{percent_encode, utf8_percent_encode, NON_ALPHANUMERIC};
 use reqwest::header::HeaderName;
 use reqwest::{Client, Method, RequestBuilder, Response, StatusCode};
@@ -195,6 +198,9 @@ impl<'a> Request<'a> {
         for (k, v) in &attributes {
             builder = match k {
                 Attribute::CacheControl => builder.header(CACHE_CONTROL, 
v.as_ref()),
+                Attribute::ContentDisposition => 
builder.header(CONTENT_DISPOSITION, v.as_ref()),
+                Attribute::ContentEncoding => builder.header(CONTENT_ENCODING, 
v.as_ref()),
+                Attribute::ContentLanguage => builder.header(CONTENT_LANGUAGE, 
v.as_ref()),
                 Attribute::ContentType => {
                     has_content_type = true;
                     builder.header(CONTENT_TYPE, v.as_ref())
diff --git a/object_store/src/http/client.rs b/object_store/src/http/client.rs
index cf259196ba4..4dccef8804b 100644
--- a/object_store/src/http/client.rs
+++ b/object_store/src/http/client.rs
@@ -25,9 +25,11 @@ use crate::{Attribute, Attributes, ClientOptions, 
GetOptions, ObjectMeta, PutPay
 use async_trait::async_trait;
 use bytes::Buf;
 use chrono::{DateTime, Utc};
-use hyper::header::{CACHE_CONTROL, CONTENT_LENGTH};
+use hyper::header::{
+    CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, 
CONTENT_LENGTH,
+    CONTENT_TYPE,
+};
 use percent_encoding::percent_decode_str;
-use reqwest::header::CONTENT_TYPE;
 use reqwest::{Method, Response, StatusCode};
 use serde::Deserialize;
 use snafu::{OptionExt, ResultExt, Snafu};
@@ -172,6 +174,11 @@ impl Client {
             for (k, v) in &attributes {
                 builder = match k {
                     Attribute::CacheControl => builder.header(CACHE_CONTROL, 
v.as_ref()),
+                    Attribute::ContentDisposition => {
+                        builder.header(CONTENT_DISPOSITION, v.as_ref())
+                    }
+                    Attribute::ContentEncoding => 
builder.header(CONTENT_ENCODING, v.as_ref()),
+                    Attribute::ContentLanguage => 
builder.header(CONTENT_LANGUAGE, v.as_ref()),
                     Attribute::ContentType => {
                         has_content_type = true;
                         builder.header(CONTENT_TYPE, v.as_ref())
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index ad72bd29ef7..c99e15a4933 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -1744,8 +1744,14 @@ mod tests {
     pub(crate) async fn put_get_attributes(integration: &dyn ObjectStore) {
         // Test handling of attributes
         let attributes = Attributes::from_iter([
-            (Attribute::ContentType, "text/html; charset=utf-8"),
             (Attribute::CacheControl, "max-age=604800"),
+            (
+                Attribute::ContentDisposition,
+                r#"attachment; filename="test.html""#,
+            ),
+            (Attribute::ContentEncoding, "gzip"),
+            (Attribute::ContentLanguage, "en-US"),
+            (Attribute::ContentType, "text/html; charset=utf-8"),
         ]);
 
         let path = Path::from("attributes");

Reply via email to