This is an automated email from the ASF dual-hosted git repository.
xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opendal.git
The following commit(s) were added to refs/heads/main by this push:
new 7f9bef278 feat: implement content-md5 for s3 (#6508)
7f9bef278 is described below
commit 7f9bef278c8cb1e8cafd4237691151967ca0178a
Author: Ruihang Xia <[email protected]>
AuthorDate: Thu Oct 30 19:38:34 2025 +0800
feat: implement content-md5 for s3 (#6508)
* feat: implement content-md5 for s3
Signed-off-by: Ruihang Xia <[email protected]>
* move to checksum algo
Signed-off-by: Ruihang Xia <[email protected]>
* avoid copying content
Signed-off-by: Ruihang Xia <[email protected]>
---------
Signed-off-by: Ruihang Xia <[email protected]>
---
core/src/raw/http_util/header.rs | 15 +++++++++++++++
core/src/raw/http_util/mod.rs | 1 +
core/src/services/s3/backend.rs | 1 +
core/src/services/s3/core.rs | 11 +++++++++++
core/src/services/s3/writer.rs | 5 +++++
5 files changed, 33 insertions(+)
diff --git a/core/src/raw/http_util/header.rs b/core/src/raw/http_util/header.rs
index bc406f84b..32553ab06 100644
--- a/core/src/raw/http_util/header.rs
+++ b/core/src/raw/http_util/header.rs
@@ -221,6 +221,21 @@ pub fn format_content_md5(bs: &[u8]) -> String {
general_purpose::STANDARD.encode(hasher.finalize())
}
+/// format content md5 header by given iter of bytes.
+pub fn format_content_md5_iter<I>(bs: I) -> String
+where
+ I: IntoIterator,
+ I::Item: AsRef<[u8]>,
+{
+ let mut hasher = md5::Md5::new();
+
+ for b in bs {
+ hasher.update(b.as_ref());
+ }
+
+ general_purpose::STANDARD.encode(hasher.finalize())
+}
+
/// format authorization header by basic auth.
///
/// # Errors
diff --git a/core/src/raw/http_util/mod.rs b/core/src/raw/http_util/mod.rs
index f031ca68b..1c0aa8cf9 100644
--- a/core/src/raw/http_util/mod.rs
+++ b/core/src/raw/http_util/mod.rs
@@ -38,6 +38,7 @@ pub use header::build_header_value;
pub use header::format_authorization_by_basic;
pub use header::format_authorization_by_bearer;
pub use header::format_content_md5;
+pub use header::format_content_md5_iter;
pub use header::parse_content_disposition;
pub use header::parse_content_encoding;
pub use header::parse_content_length;
diff --git a/core/src/services/s3/backend.rs b/core/src/services/s3/backend.rs
index e772a5642..3bcbf27cb 100644
--- a/core/src/services/s3/backend.rs
+++ b/core/src/services/s3/backend.rs
@@ -771,6 +771,7 @@ impl Builder for S3Builder {
let checksum_algorithm = match
self.config.checksum_algorithm.as_deref() {
Some("crc32c") => Some(ChecksumAlgorithm::Crc32c),
+ Some("md5") => Some(ChecksumAlgorithm::Md5),
None => None,
v => {
return Err(Error::new(
diff --git a/core/src/services/s3/core.rs b/core/src/services/s3/core.rs
index 06232589a..9e63d5776 100644
--- a/core/src/services/s3/core.rs
+++ b/core/src/services/s3/core.rs
@@ -275,6 +275,7 @@ impl S3Core {
.for_each(|b| crc = crc32c::crc32c_append(crc, &b));
Some(BASE64_STANDARD.encode(crc.to_be_bytes()))
}
+ Some(ChecksumAlgorithm::Md5) =>
Some(format_content_md5_iter(body.clone())),
}
}
pub fn insert_checksum_header(
@@ -588,6 +589,12 @@ impl S3Core {
// Set SSE headers.
req = self.insert_sse_headers(req, true);
+ // Calculate Checksum.
+ if let Some(checksum) = self.calculate_checksum(&body) {
+ // Set Checksum header.
+ req = self.insert_checksum_header(req, &checksum);
+ }
+
// Inject operation to the request.
req = req.extension(Operation::Write);
@@ -1251,11 +1258,14 @@ pub struct ListObjectVersionsOutputDeleteMarker {
pub enum ChecksumAlgorithm {
Crc32c,
+ /// Mapping to the `Content-MD5` header from S3.
+ Md5,
}
impl ChecksumAlgorithm {
pub fn to_header_name(&self) -> HeaderName {
match self {
Self::Crc32c => HeaderName::from_static("x-amz-checksum-crc32c"),
+ Self::Md5 => HeaderName::from_static("content-md5"),
}
}
}
@@ -1266,6 +1276,7 @@ impl Display for ChecksumAlgorithm {
"{}",
match self {
Self::Crc32c => "CRC32C",
+ Self::Md5 => "MD5",
}
)
}
diff --git a/core/src/services/s3/writer.rs b/core/src/services/s3/writer.rs
index 54a1b6e3f..c88ed61ea 100644
--- a/core/src/services/s3/writer.rs
+++ b/core/src/services/s3/writer.rs
@@ -172,6 +172,11 @@ impl oio::MultipartWrite for S3Writer {
etag: p.etag.clone(),
checksum_crc32c: p.checksum.clone(),
},
+ ChecksumAlgorithm::Md5 =>
CompleteMultipartUploadRequestPart {
+ part_number: p.part_number,
+ etag: p.etag.clone(),
+ ..Default::default()
+ },
},
})
.collect();