This is an automated email from the ASF dual-hosted git repository. xuanwo pushed a commit to branch fix-stat in repository https://gitbox.apache.org/repos/asf/incubator-opendal.git
commit 501b7187e2762bd2b4dc71c2f97966cc32f0c6bd Author: Xuanwo <[email protected]> AuthorDate: Tue Nov 21 19:19:39 2023 +0800 Fix azblob Signed-off-by: Xuanwo <[email protected]> --- core/src/services/azblob/backend.rs | 28 ++++- core/src/services/azblob/core.rs | 230 +++++++++++++++++++++++++++++++++++ core/src/services/azblob/lister.rs | 236 +----------------------------------- core/src/services/s3/backend.rs | 32 +++-- 4 files changed, 272 insertions(+), 254 deletions(-) diff --git a/core/src/services/azblob/backend.rs b/core/src/services/azblob/backend.rs index b3e8caad9..42548154c 100644 --- a/core/src/services/azblob/backend.rs +++ b/core/src/services/azblob/backend.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use async_trait::async_trait; use base64::prelude::BASE64_STANDARD; use base64::Engine; +use bytes::Buf; use http::header::CONTENT_TYPE; use http::StatusCode; use log::debug; @@ -37,7 +38,7 @@ use super::error::parse_error; use super::lister::AzblobLister; use super::writer::AzblobWriter; use crate::raw::*; -use crate::services::azblob::core::AzblobCore; +use crate::services::azblob::core::{AzblobCore, ListBlobsOutput}; use crate::services::azblob::writer::AzblobWriters; use crate::types::Metadata; use crate::*; @@ -664,15 +665,34 @@ impl Accessor for AzblobBackend { return Ok(RpStat::new(Metadata::new(EntryMode::DIR))); } + if path.ends_with('/') { + let resp = self.core.azblob_list_blobs(path, "", "", Some(1)).await?; + + if resp.status() != StatusCode::OK { + return Err(parse_error(resp).await?); + } + + let bs = resp.into_body().bytes().await?; + + let output: ListBlobsOutput = + quick_xml::de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; + + return if !output.blobs.blob.is_empty() { + Ok(RpStat::new(Metadata::new(EntryMode::DIR))) + } else { + Err( + Error::new(ErrorKind::NotFound, "The directory is not exist") + .with_context("path", path), + ) + }; + } + let resp = self.core.azblob_get_blob_properties(path, &args).await?; let status = resp.status(); match status { StatusCode::OK => parse_into_metadata(path, resp.headers()).map(RpStat::new), - StatusCode::NOT_FOUND if path.ends_with('/') => { - Ok(RpStat::new(Metadata::new(EntryMode::DIR))) - } _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/azblob/core.rs b/core/src/services/azblob/core.rs index 7db055660..63a003f2d 100644 --- a/core/src/services/azblob/core.rs +++ b/core/src/services/azblob/core.rs @@ -32,6 +32,7 @@ use http::Response; use reqsign::AzureStorageCredential; use reqsign::AzureStorageLoader; use reqsign::AzureStorageSigner; +use serde::Deserialize; use crate::raw::*; use crate::*; @@ -531,3 +532,232 @@ impl AzblobCore { self.send(req).await } } + +#[derive(Default, Debug, Deserialize)] +#[serde(default, rename_all = "PascalCase")] +pub struct ListBlobsOutput { + pub blobs: Blobs, + #[serde(rename = "NextMarker")] + pub next_marker: Option<String>, +} + +#[derive(Default, Debug, Deserialize)] +#[serde(default, rename_all = "PascalCase")] +pub struct Blobs { + pub blob: Vec<Blob>, + pub blob_prefix: Vec<BlobPrefix>, +} + +#[derive(Default, Debug, Deserialize)] +#[serde(default, rename_all = "PascalCase")] +pub struct BlobPrefix { + pub name: String, +} + +#[derive(Default, Debug, Deserialize)] +#[serde(default, rename_all = "PascalCase")] +pub struct Blob { + pub properties: Properties, + pub name: String, +} + +#[derive(Default, Debug, Deserialize)] +#[serde(default, rename_all = "PascalCase")] +pub struct Properties { + #[serde(rename = "Content-Length")] + pub content_length: u64, + #[serde(rename = "Last-Modified")] + pub last_modified: String, + #[serde(rename = "Content-MD5")] + pub content_md5: String, + #[serde(rename = "Content-Type")] + pub content_type: String, + pub etag: String, +} + +#[cfg(test)] +mod tests { + use bytes::{Buf, Bytes}; + + use super::*; + use quick_xml::de; + + #[test] + fn test_parse_xml() { + let bs = bytes::Bytes::from( + r#" + <?xml version="1.0" encoding="utf-8"?> + <EnumerationResults ServiceEndpoint="https://test.blob.core.windows.net/" ContainerName="myazurebucket"> + <Prefix>dir1/</Prefix> + <Delimiter>/</Delimiter> + <Blobs> + <Blob> + <Name>dir1/2f018bb5-466f-4af1-84fa-2b167374ee06</Name> + <Properties> + <Creation-Time>Sun, 20 Mar 2022 11:29:03 GMT</Creation-Time> + <Last-Modified>Sun, 20 Mar 2022 11:29:03 GMT</Last-Modified> + <Etag>0x8DA0A64D66790C3</Etag> + <Content-Length>3485277</Content-Length> + <Content-Type>application/octet-stream</Content-Type> + <Content-Encoding /> + <Content-Language /> + <Content-CRC64 /> + <Content-MD5>llJ/+jOlx5GdA1sL7SdKuw==</Content-MD5> + <Cache-Control /> + <Content-Disposition /> + <BlobType>BlockBlob</BlobType> + <AccessTier>Hot</AccessTier> + <AccessTierInferred>true</AccessTierInferred> + <LeaseStatus>unlocked</LeaseStatus> + <LeaseState>available</LeaseState> + <ServerEncrypted>true</ServerEncrypted> + </Properties> + <OrMetadata /> + </Blob> + <Blob> + <Name>dir1/5b9432b2-79c0-48d8-90c2-7d3e153826ed</Name> + <Properties> + <Creation-Time>Tue, 29 Mar 2022 01:54:07 GMT</Creation-Time> + <Last-Modified>Tue, 29 Mar 2022 01:54:07 GMT</Last-Modified> + <Etag>0x8DA112702D88FE4</Etag> + <Content-Length>2471869</Content-Length> + <Content-Type>application/octet-stream</Content-Type> + <Content-Encoding /> + <Content-Language /> + <Content-CRC64 /> + <Content-MD5>xmgUltSnopLSJOukgCHFtg==</Content-MD5> + <Cache-Control /> + <Content-Disposition /> + <BlobType>BlockBlob</BlobType> + <AccessTier>Hot</AccessTier> + <AccessTierInferred>true</AccessTierInferred> + <LeaseStatus>unlocked</LeaseStatus> + <LeaseState>available</LeaseState> + <ServerEncrypted>true</ServerEncrypted> + </Properties> + <OrMetadata /> + </Blob> + <Blob> + <Name>dir1/b2d96f8b-d467-40d1-bb11-4632dddbf5b5</Name> + <Properties> + <Creation-Time>Sun, 20 Mar 2022 11:31:57 GMT</Creation-Time> + <Last-Modified>Sun, 20 Mar 2022 11:31:57 GMT</Last-Modified> + <Etag>0x8DA0A653DC82981</Etag> + <Content-Length>1259677</Content-Length> + <Content-Type>application/octet-stream</Content-Type> + <Content-Encoding /> + <Content-Language /> + <Content-CRC64 /> + <Content-MD5>AxTiFXHwrXKaZC5b7ZRybw==</Content-MD5> + <Cache-Control /> + <Content-Disposition /> + <BlobType>BlockBlob</BlobType> + <AccessTier>Hot</AccessTier> + <AccessTierInferred>true</AccessTierInferred> + <LeaseStatus>unlocked</LeaseStatus> + <LeaseState>available</LeaseState> + <ServerEncrypted>true</ServerEncrypted> + </Properties> + <OrMetadata /> + </Blob> + <BlobPrefix> + <Name>dir1/dir2/</Name> + </BlobPrefix> + <BlobPrefix> + <Name>dir1/dir21/</Name> + </BlobPrefix> + </Blobs> + <NextMarker /> + </EnumerationResults>"#, + ); + let out: ListBlobsOutput = de::from_reader(bs.reader()).expect("must success"); + println!("{out:?}"); + + assert_eq!( + out.blobs + .blob + .iter() + .map(|v| v.name.clone()) + .collect::<Vec<String>>(), + [ + "dir1/2f018bb5-466f-4af1-84fa-2b167374ee06", + "dir1/5b9432b2-79c0-48d8-90c2-7d3e153826ed", + "dir1/b2d96f8b-d467-40d1-bb11-4632dddbf5b5" + ] + ); + assert_eq!( + out.blobs + .blob + .iter() + .map(|v| v.properties.content_length) + .collect::<Vec<u64>>(), + [3485277, 2471869, 1259677] + ); + assert_eq!( + out.blobs + .blob + .iter() + .map(|v| v.properties.content_md5.clone()) + .collect::<Vec<String>>(), + [ + "llJ/+jOlx5GdA1sL7SdKuw==".to_string(), + "xmgUltSnopLSJOukgCHFtg==".to_string(), + "AxTiFXHwrXKaZC5b7ZRybw==".to_string() + ] + ); + assert_eq!( + out.blobs + .blob + .iter() + .map(|v| v.properties.last_modified.clone()) + .collect::<Vec<String>>(), + [ + "Sun, 20 Mar 2022 11:29:03 GMT".to_string(), + "Tue, 29 Mar 2022 01:54:07 GMT".to_string(), + "Sun, 20 Mar 2022 11:31:57 GMT".to_string() + ] + ); + assert_eq!( + out.blobs + .blob + .iter() + .map(|v| v.properties.etag.clone()) + .collect::<Vec<String>>(), + [ + "0x8DA0A64D66790C3".to_string(), + "0x8DA112702D88FE4".to_string(), + "0x8DA0A653DC82981".to_string() + ] + ); + assert_eq!( + out.blobs + .blob_prefix + .iter() + .map(|v| v.name.clone()) + .collect::<Vec<String>>(), + ["dir1/dir2/", "dir1/dir21/"] + ); + } + + /// This case is copied from real environment for testing + /// quick-xml overlapped-lists features. By default, quick-xml + /// can't deserialize content with overlapped-lists. + /// + /// For example, this case list blobs in this way: + /// + /// ```xml + /// <Blobs> + /// <Blob>xxx</Blob> + /// <BlobPrefix>yyy</BlobPrefix> + /// <Blob>zzz</Blob> + /// </Blobs> + /// ``` + /// + /// If `overlapped-lists` feature not enabled, we will get error `duplicate field Blob`. + #[test] + fn test_parse_overlapped_lists() { + let bs = "<?xml version=\"1.0\" encoding=\"utf-8\"?><EnumerationResults ServiceEndpoint=\"https://test.blob.core.windows.net/\" ContainerName=\"test\"><Prefix>9f7075e1-84d0-45ca-8196-ab9b71a8ef97/x/</Prefix><Delimiter>/</Delimiter><Blobs><Blob><Name>9f7075e1-84d0-45ca-8196-ab9b71a8ef97/x/</Name><Properties><Creation-Time>Thu, 01 Sep 2022 07:26:49 GMT</Creation-Time><Last-Modified>Thu, 01 Sep 2022 07:26:49 GMT</Last-Modified><Etag>0x8DA8BEB55D0EA35</Etag><Content-Length>0</Content [...] + + de::from_reader(Bytes::from(bs).reader()).expect("must success") + } +} diff --git a/core/src/services/azblob/lister.rs b/core/src/services/azblob/lister.rs index 96e15821e..ad78e655a 100644 --- a/core/src/services/azblob/lister.rs +++ b/core/src/services/azblob/lister.rs @@ -20,9 +20,8 @@ use std::sync::Arc; use async_trait::async_trait; use bytes::Buf; use quick_xml::de; -use serde::Deserialize; -use super::core::AzblobCore; +use super::core::{AzblobCore, ListBlobsOutput}; use super::error::parse_error; use crate::raw::*; use crate::*; @@ -62,9 +61,8 @@ impl oio::PageList for AzblobLister { let bs = resp.into_body().bytes().await?; - let output: Output = de::from_reader(bs.reader()).map_err(|e| { - Error::new(ErrorKind::Unexpected, "deserialize xml from response").set_source(e) - })?; + let output: ListBlobsOutput = + de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; // Try our best to check whether this list is done. // @@ -111,231 +109,3 @@ impl oio::PageList for AzblobLister { Ok(()) } } - -#[derive(Default, Debug, Deserialize)] -#[serde(default, rename_all = "PascalCase")] -struct Output { - blobs: Blobs, - #[serde(rename = "NextMarker")] - next_marker: Option<String>, -} - -#[derive(Default, Debug, Deserialize)] -#[serde(default, rename_all = "PascalCase")] -struct Blobs { - blob: Vec<Blob>, - blob_prefix: Vec<BlobPrefix>, -} - -#[derive(Default, Debug, Deserialize)] -#[serde(default, rename_all = "PascalCase")] -struct BlobPrefix { - name: String, -} - -#[derive(Default, Debug, Deserialize)] -#[serde(default, rename_all = "PascalCase")] -struct Blob { - properties: Properties, - name: String, -} - -#[derive(Default, Debug, Deserialize)] -#[serde(default, rename_all = "PascalCase")] -struct Properties { - #[serde(rename = "Content-Length")] - content_length: u64, - #[serde(rename = "Last-Modified")] - last_modified: String, - #[serde(rename = "Content-MD5")] - content_md5: String, - #[serde(rename = "Content-Type")] - content_type: String, - etag: String, -} - -#[cfg(test)] -mod tests { - use bytes::Bytes; - - use super::*; - - #[test] - fn test_parse_xml() { - let bs = bytes::Bytes::from( - r#" - <?xml version="1.0" encoding="utf-8"?> - <EnumerationResults ServiceEndpoint="https://test.blob.core.windows.net/" ContainerName="myazurebucket"> - <Prefix>dir1/</Prefix> - <Delimiter>/</Delimiter> - <Blobs> - <Blob> - <Name>dir1/2f018bb5-466f-4af1-84fa-2b167374ee06</Name> - <Properties> - <Creation-Time>Sun, 20 Mar 2022 11:29:03 GMT</Creation-Time> - <Last-Modified>Sun, 20 Mar 2022 11:29:03 GMT</Last-Modified> - <Etag>0x8DA0A64D66790C3</Etag> - <Content-Length>3485277</Content-Length> - <Content-Type>application/octet-stream</Content-Type> - <Content-Encoding /> - <Content-Language /> - <Content-CRC64 /> - <Content-MD5>llJ/+jOlx5GdA1sL7SdKuw==</Content-MD5> - <Cache-Control /> - <Content-Disposition /> - <BlobType>BlockBlob</BlobType> - <AccessTier>Hot</AccessTier> - <AccessTierInferred>true</AccessTierInferred> - <LeaseStatus>unlocked</LeaseStatus> - <LeaseState>available</LeaseState> - <ServerEncrypted>true</ServerEncrypted> - </Properties> - <OrMetadata /> - </Blob> - <Blob> - <Name>dir1/5b9432b2-79c0-48d8-90c2-7d3e153826ed</Name> - <Properties> - <Creation-Time>Tue, 29 Mar 2022 01:54:07 GMT</Creation-Time> - <Last-Modified>Tue, 29 Mar 2022 01:54:07 GMT</Last-Modified> - <Etag>0x8DA112702D88FE4</Etag> - <Content-Length>2471869</Content-Length> - <Content-Type>application/octet-stream</Content-Type> - <Content-Encoding /> - <Content-Language /> - <Content-CRC64 /> - <Content-MD5>xmgUltSnopLSJOukgCHFtg==</Content-MD5> - <Cache-Control /> - <Content-Disposition /> - <BlobType>BlockBlob</BlobType> - <AccessTier>Hot</AccessTier> - <AccessTierInferred>true</AccessTierInferred> - <LeaseStatus>unlocked</LeaseStatus> - <LeaseState>available</LeaseState> - <ServerEncrypted>true</ServerEncrypted> - </Properties> - <OrMetadata /> - </Blob> - <Blob> - <Name>dir1/b2d96f8b-d467-40d1-bb11-4632dddbf5b5</Name> - <Properties> - <Creation-Time>Sun, 20 Mar 2022 11:31:57 GMT</Creation-Time> - <Last-Modified>Sun, 20 Mar 2022 11:31:57 GMT</Last-Modified> - <Etag>0x8DA0A653DC82981</Etag> - <Content-Length>1259677</Content-Length> - <Content-Type>application/octet-stream</Content-Type> - <Content-Encoding /> - <Content-Language /> - <Content-CRC64 /> - <Content-MD5>AxTiFXHwrXKaZC5b7ZRybw==</Content-MD5> - <Cache-Control /> - <Content-Disposition /> - <BlobType>BlockBlob</BlobType> - <AccessTier>Hot</AccessTier> - <AccessTierInferred>true</AccessTierInferred> - <LeaseStatus>unlocked</LeaseStatus> - <LeaseState>available</LeaseState> - <ServerEncrypted>true</ServerEncrypted> - </Properties> - <OrMetadata /> - </Blob> - <BlobPrefix> - <Name>dir1/dir2/</Name> - </BlobPrefix> - <BlobPrefix> - <Name>dir1/dir21/</Name> - </BlobPrefix> - </Blobs> - <NextMarker /> - </EnumerationResults>"#, - ); - let out: Output = de::from_reader(bs.reader()).expect("must success"); - println!("{out:?}"); - - assert_eq!( - out.blobs - .blob - .iter() - .map(|v| v.name.clone()) - .collect::<Vec<String>>(), - [ - "dir1/2f018bb5-466f-4af1-84fa-2b167374ee06", - "dir1/5b9432b2-79c0-48d8-90c2-7d3e153826ed", - "dir1/b2d96f8b-d467-40d1-bb11-4632dddbf5b5" - ] - ); - assert_eq!( - out.blobs - .blob - .iter() - .map(|v| v.properties.content_length) - .collect::<Vec<u64>>(), - [3485277, 2471869, 1259677] - ); - assert_eq!( - out.blobs - .blob - .iter() - .map(|v| v.properties.content_md5.clone()) - .collect::<Vec<String>>(), - [ - "llJ/+jOlx5GdA1sL7SdKuw==".to_string(), - "xmgUltSnopLSJOukgCHFtg==".to_string(), - "AxTiFXHwrXKaZC5b7ZRybw==".to_string() - ] - ); - assert_eq!( - out.blobs - .blob - .iter() - .map(|v| v.properties.last_modified.clone()) - .collect::<Vec<String>>(), - [ - "Sun, 20 Mar 2022 11:29:03 GMT".to_string(), - "Tue, 29 Mar 2022 01:54:07 GMT".to_string(), - "Sun, 20 Mar 2022 11:31:57 GMT".to_string() - ] - ); - assert_eq!( - out.blobs - .blob - .iter() - .map(|v| v.properties.etag.clone()) - .collect::<Vec<String>>(), - [ - "0x8DA0A64D66790C3".to_string(), - "0x8DA112702D88FE4".to_string(), - "0x8DA0A653DC82981".to_string() - ] - ); - assert_eq!( - out.blobs - .blob_prefix - .iter() - .map(|v| v.name.clone()) - .collect::<Vec<String>>(), - ["dir1/dir2/", "dir1/dir21/"] - ); - } - - /// This case is copied from real environment for testing - /// quick-xml overlapped-lists features. By default, quick-xml - /// can't deserialize content with overlapped-lists. - /// - /// For example, this case list blobs in this way: - /// - /// ```xml - /// <Blobs> - /// <Blob>xxx</Blob> - /// <BlobPrefix>yyy</BlobPrefix> - /// <Blob>zzz</Blob> - /// </Blobs> - /// ``` - /// - /// If `overlapped-lists` feature not enabled, we will get error `duplicate field Blob`. - #[test] - fn test_parse_overlapped_lists() { - let bs = "<?xml version=\"1.0\" encoding=\"utf-8\"?><EnumerationResults ServiceEndpoint=\"https://test.blob.core.windows.net/\" ContainerName=\"test\"><Prefix>9f7075e1-84d0-45ca-8196-ab9b71a8ef97/x/</Prefix><Delimiter>/</Delimiter><Blobs><Blob><Name>9f7075e1-84d0-45ca-8196-ab9b71a8ef97/x/</Name><Properties><Creation-Time>Thu, 01 Sep 2022 07:26:49 GMT</Creation-Time><Last-Modified>Thu, 01 Sep 2022 07:26:49 GMT</Last-Modified><Etag>0x8DA8BEB55D0EA35</Etag><Content-Length>0</Content [...] - - de::from_reader(Bytes::from(bs).reader()).expect("must success") - } -} diff --git a/core/src/services/s3/backend.rs b/core/src/services/s3/backend.rs index bd87d726e..18e0b96be 100644 --- a/core/src/services/s3/backend.rs +++ b/core/src/services/s3/backend.rs @@ -1108,23 +1108,21 @@ impl Accessor for S3Backend { .s3_list_objects(path, "", "", Some(1), None) .await?; - let status = resp.status(); - - return match status { - StatusCode::OK => { - let bs = resp.into_body().bytes().await?; - let output: Output = quick_xml::de::from_reader(bs.reader()) - .map_err(new_xml_deserialize_error)?; - if !output.contents.is_empty() { - Ok(RpStat::new(Metadata::new(EntryMode::DIR))) - } else { - Err( - Error::new(ErrorKind::NotFound, "The directory is not found") - .with_context("path", path), - ) - } - } - _ => Err(parse_error(resp).await?), + if resp.status() != StatusCode::OK { + return Err(parse_error(resp).await?); + } + + let bs = resp.into_body().bytes().await?; + let output: Output = + quick_xml::de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; + + return if !output.contents.is_empty() { + Ok(RpStat::new(Metadata::new(EntryMode::DIR))) + } else { + Err( + Error::new(ErrorKind::NotFound, "The directory is not found") + .with_context("path", path), + ) }; }
