This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 7feb5425e6b Add more attributes (#5690)
7feb5425e6b is described below
commit 7feb5425e6b5d251ac1ec94c7b102a6e64d9b496
Author: nett_hier <[email protected]>
AuthorDate: Fri Apr 26 12:40:28 2024 +0200
Add more attributes (#5690)
Signed-off-by: netthier <[email protected]>
---
object_store/src/attributes.rs | 36 ++++++++++++++++++++---
object_store/src/aws/client.rs | 10 +++++--
object_store/src/azure/client.rs | 9 ++++++
object_store/src/client/get.rs | 62 ++++++++++++++++++++++++++++++++++------
object_store/src/gcp/client.rs | 8 +++++-
object_store/src/http/client.rs | 11 +++++--
object_store/src/lib.rs | 8 +++++-
7 files changed, 125 insertions(+), 19 deletions(-)
diff --git a/object_store/src/attributes.rs b/object_store/src/attributes.rs
index 9b90b532585..ecef32eb48a 100644
--- a/object_store/src/attributes.rs
+++ b/object_store/src/attributes.rs
@@ -23,6 +23,18 @@ use std::ops::Deref;
#[non_exhaustive]
#[derive(Debug, Hash, Eq, PartialEq, Clone)]
pub enum Attribute {
+ /// Specifies how the object should be handled by a browser
+ ///
+ /// See
[Content-Disposition](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition)
+ ContentDisposition,
+ /// Specifies the encodings applied to the object
+ ///
+ /// See
[Content-Encoding](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding)
+ ContentEncoding,
+ /// Specifies the language of the object
+ ///
+ /// See
[Content-Language](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language)
+ ContentLanguage,
/// Specifies the MIME type of the object
///
/// This takes precedence over any [ClientOptions](crate::ClientOptions)
configuration
@@ -177,12 +189,15 @@ mod tests {
#[test]
fn test_attributes_basic() {
let mut attributes = Attributes::from_iter([
+ (Attribute::ContentDisposition, "inline"),
+ (Attribute::ContentEncoding, "gzip"),
+ (Attribute::ContentLanguage, "en-US"),
(Attribute::ContentType, "test"),
(Attribute::CacheControl, "control"),
]);
assert!(!attributes.is_empty());
- assert_eq!(attributes.len(), 2);
+ assert_eq!(attributes.len(), 5);
assert_eq!(
attributes.get(&Attribute::ContentType),
@@ -195,17 +210,30 @@ mod tests {
attributes.insert(Attribute::CacheControl, "v1".into()),
Some(metav)
);
- assert_eq!(attributes.len(), 2);
+ assert_eq!(attributes.len(), 5);
assert_eq!(
attributes.remove(&Attribute::CacheControl).unwrap(),
"v1".into()
);
- assert_eq!(attributes.len(), 1);
+ assert_eq!(attributes.len(), 4);
let metav: AttributeValue = "v2".into();
attributes.insert(Attribute::CacheControl, metav.clone());
assert_eq!(attributes.get(&Attribute::CacheControl), Some(&metav));
- assert_eq!(attributes.len(), 2);
+ assert_eq!(attributes.len(), 5);
+
+ assert_eq!(
+ attributes.get(&Attribute::ContentDisposition),
+ Some(&"inline".into())
+ );
+ assert_eq!(
+ attributes.get(&Attribute::ContentEncoding),
+ Some(&"gzip".into())
+ );
+ assert_eq!(
+ attributes.get(&Attribute::ContentLanguage),
+ Some(&"en-US".into())
+ );
}
}
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 4a4dc178d5b..24247688e86 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -42,14 +42,17 @@ use async_trait::async_trait;
use base64::prelude::BASE64_STANDARD;
use base64::Engine;
use bytes::{Buf, Bytes};
-use hyper::header::{CACHE_CONTROL, CONTENT_LENGTH};
+use hyper::header::{
+ CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE,
CONTENT_LENGTH,
+ CONTENT_TYPE,
+};
use hyper::http::HeaderName;
use hyper::{http, HeaderMap};
use itertools::Itertools;
use md5::{Digest, Md5};
use percent_encoding::{utf8_percent_encode, PercentEncode};
use quick_xml::events::{self as xml_events};
-use reqwest::{header::CONTENT_TYPE, Client as ReqwestClient, Method,
RequestBuilder, Response};
+use reqwest::{Client as ReqwestClient, Method, RequestBuilder, Response};
use ring::digest;
use ring::digest::Context;
use serde::{Deserialize, Serialize};
@@ -322,6 +325,9 @@ impl<'a> Request<'a> {
for (k, v) in &attributes {
builder = match k {
Attribute::CacheControl => builder.header(CACHE_CONTROL,
v.as_ref()),
+ Attribute::ContentDisposition =>
builder.header(CONTENT_DISPOSITION, v.as_ref()),
+ Attribute::ContentEncoding => builder.header(CONTENT_ENCODING,
v.as_ref()),
+ Attribute::ContentLanguage => builder.header(CONTENT_LANGUAGE,
v.as_ref()),
Attribute::ContentType => {
has_content_type = true;
builder.header(CONTENT_TYPE, v.as_ref())
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index 918fcd047ae..311bd72ff52 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -50,6 +50,10 @@ use url::Url;
const VERSION_HEADER: &str = "x-ms-version-id";
static MS_CACHE_CONTROL: HeaderName =
HeaderName::from_static("x-ms-blob-cache-control");
static MS_CONTENT_TYPE: HeaderName =
HeaderName::from_static("x-ms-blob-content-type");
+static MS_CONTENT_DISPOSITION: HeaderName =
+ HeaderName::from_static("x-ms-blob-content-disposition");
+static MS_CONTENT_ENCODING: HeaderName =
HeaderName::from_static("x-ms-blob-content-encoding");
+static MS_CONTENT_LANGUAGE: HeaderName =
HeaderName::from_static("x-ms-blob-content-language");
static TAGS_HEADER: HeaderName = HeaderName::from_static("x-ms-tags");
@@ -206,6 +210,11 @@ impl<'a> PutRequest<'a> {
for (k, v) in &attributes {
builder = match k {
Attribute::CacheControl => builder.header(&MS_CACHE_CONTROL,
v.as_ref()),
+ Attribute::ContentDisposition => {
+ builder.header(&MS_CONTENT_DISPOSITION, v.as_ref())
+ }
+ Attribute::ContentEncoding =>
builder.header(&MS_CONTENT_ENCODING, v.as_ref()),
+ Attribute::ContentLanguage =>
builder.header(&MS_CONTENT_LANGUAGE, v.as_ref()),
Attribute::ContentType => {
has_content_type = true;
builder.header(&MS_CONTENT_TYPE, v.as_ref())
diff --git a/object_store/src/client/get.rs b/object_store/src/client/get.rs
index f700457611f..430b87b1ba3 100644
--- a/object_store/src/client/get.rs
+++ b/object_store/src/client/get.rs
@@ -22,7 +22,10 @@ use crate::path::Path;
use crate::{Attribute, Attributes, GetOptions, GetRange, GetResult,
GetResultPayload, Result};
use async_trait::async_trait;
use futures::{StreamExt, TryStreamExt};
-use hyper::header::{CACHE_CONTROL, CONTENT_RANGE, CONTENT_TYPE};
+use hyper::header::{
+ CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE,
CONTENT_RANGE,
+ CONTENT_TYPE,
+};
use hyper::StatusCode;
use reqwest::header::ToStrError;
use reqwest::Response;
@@ -120,6 +123,15 @@ enum GetResultError {
#[snafu(display("Cache-Control header contained non UTF-8 characters"))]
InvalidCacheControl { source: ToStrError },
+ #[snafu(display("Content-Disposition header contained non UTF-8
characters"))]
+ InvalidContentDisposition { source: ToStrError },
+
+ #[snafu(display("Content-Encoding header contained non UTF-8 characters"))]
+ InvalidContentEncoding { source: ToStrError },
+
+ #[snafu(display("Content-Language header contained non UTF-8 characters"))]
+ InvalidContentLanguage { source: ToStrError },
+
#[snafu(display("Content-Type header contained non UTF-8 characters"))]
InvalidContentType { source: ToStrError },
@@ -167,16 +179,48 @@ fn get_result<T: GetClient>(
0..meta.size
};
- let mut attributes = Attributes::new();
- if let Some(x) = response.headers().get(CACHE_CONTROL) {
- let x = x.to_str().context(InvalidCacheControlSnafu)?;
- attributes.insert(Attribute::CacheControl, x.to_string().into());
- }
- if let Some(x) = response.headers().get(CONTENT_TYPE) {
- let x = x.to_str().context(InvalidContentTypeSnafu)?;
- attributes.insert(Attribute::ContentType, x.to_string().into());
+ macro_rules! parse_attributes {
+ ($headers:expr, $(($header:expr, $attr:expr, $err:expr)),*) => {{
+ let mut attributes = Attributes::new();
+ $(
+ if let Some(x) = $headers.get($header) {
+ let x = x.to_str().context($err)?;
+ attributes.insert($attr, x.to_string().into());
+ }
+ )*
+ attributes
+ }}
}
+ let attributes = parse_attributes!(
+ response.headers(),
+ (
+ CACHE_CONTROL,
+ Attribute::CacheControl,
+ InvalidCacheControlSnafu
+ ),
+ (
+ CONTENT_DISPOSITION,
+ Attribute::ContentDisposition,
+ InvalidContentDispositionSnafu
+ ),
+ (
+ CONTENT_ENCODING,
+ Attribute::ContentEncoding,
+ InvalidContentEncodingSnafu
+ ),
+ (
+ CONTENT_LANGUAGE,
+ Attribute::ContentLanguage,
+ InvalidContentLanguageSnafu
+ ),
+ (
+ CONTENT_TYPE,
+ Attribute::ContentType,
+ InvalidContentTypeSnafu
+ )
+ );
+
let stream = response
.bytes_stream()
.map_err(|source| crate::Error::Generic {
diff --git a/object_store/src/gcp/client.rs b/object_store/src/gcp/client.rs
index 9c39efe6b23..a5493256546 100644
--- a/object_store/src/gcp/client.rs
+++ b/object_store/src/gcp/client.rs
@@ -36,7 +36,10 @@ use async_trait::async_trait;
use base64::prelude::BASE64_STANDARD;
use base64::Engine;
use bytes::Buf;
-use hyper::header::{CACHE_CONTROL, CONTENT_LENGTH, CONTENT_TYPE};
+use hyper::header::{
+ CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE,
CONTENT_LENGTH,
+ CONTENT_TYPE,
+};
use percent_encoding::{percent_encode, utf8_percent_encode, NON_ALPHANUMERIC};
use reqwest::header::HeaderName;
use reqwest::{Client, Method, RequestBuilder, Response, StatusCode};
@@ -195,6 +198,9 @@ impl<'a> Request<'a> {
for (k, v) in &attributes {
builder = match k {
Attribute::CacheControl => builder.header(CACHE_CONTROL,
v.as_ref()),
+ Attribute::ContentDisposition =>
builder.header(CONTENT_DISPOSITION, v.as_ref()),
+ Attribute::ContentEncoding => builder.header(CONTENT_ENCODING,
v.as_ref()),
+ Attribute::ContentLanguage => builder.header(CONTENT_LANGUAGE,
v.as_ref()),
Attribute::ContentType => {
has_content_type = true;
builder.header(CONTENT_TYPE, v.as_ref())
diff --git a/object_store/src/http/client.rs b/object_store/src/http/client.rs
index cf259196ba4..4dccef8804b 100644
--- a/object_store/src/http/client.rs
+++ b/object_store/src/http/client.rs
@@ -25,9 +25,11 @@ use crate::{Attribute, Attributes, ClientOptions,
GetOptions, ObjectMeta, PutPay
use async_trait::async_trait;
use bytes::Buf;
use chrono::{DateTime, Utc};
-use hyper::header::{CACHE_CONTROL, CONTENT_LENGTH};
+use hyper::header::{
+ CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE,
CONTENT_LENGTH,
+ CONTENT_TYPE,
+};
use percent_encoding::percent_decode_str;
-use reqwest::header::CONTENT_TYPE;
use reqwest::{Method, Response, StatusCode};
use serde::Deserialize;
use snafu::{OptionExt, ResultExt, Snafu};
@@ -172,6 +174,11 @@ impl Client {
for (k, v) in &attributes {
builder = match k {
Attribute::CacheControl => builder.header(CACHE_CONTROL,
v.as_ref()),
+ Attribute::ContentDisposition => {
+ builder.header(CONTENT_DISPOSITION, v.as_ref())
+ }
+ Attribute::ContentEncoding =>
builder.header(CONTENT_ENCODING, v.as_ref()),
+ Attribute::ContentLanguage =>
builder.header(CONTENT_LANGUAGE, v.as_ref()),
Attribute::ContentType => {
has_content_type = true;
builder.header(CONTENT_TYPE, v.as_ref())
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index ad72bd29ef7..c99e15a4933 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -1744,8 +1744,14 @@ mod tests {
pub(crate) async fn put_get_attributes(integration: &dyn ObjectStore) {
// Test handling of attributes
let attributes = Attributes::from_iter([
- (Attribute::ContentType, "text/html; charset=utf-8"),
(Attribute::CacheControl, "max-age=604800"),
+ (
+ Attribute::ContentDisposition,
+ r#"attachment; filename="test.html""#,
+ ),
+ (Attribute::ContentEncoding, "gzip"),
+ (Attribute::ContentLanguage, "en-US"),
+ (Attribute::ContentType, "text/html; charset=utf-8"),
]);
let path = Path::from("attributes");