This is an automated email from the ASF dual-hosted git repository.

xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opendal.git


The following commit(s) were added to refs/heads/main by this push:
     new 7f9bef278 feat: implement content-md5 for s3 (#6508)
7f9bef278 is described below

commit 7f9bef278c8cb1e8cafd4237691151967ca0178a
Author: Ruihang Xia <[email protected]>
AuthorDate: Thu Oct 30 19:38:34 2025 +0800

    feat: implement content-md5 for s3 (#6508)
    
    * feat: implement content-md5 for s3
    
    Signed-off-by: Ruihang Xia <[email protected]>
    
    * move to checksum algo
    
    Signed-off-by: Ruihang Xia <[email protected]>
    
    * avoid copying content
    
    Signed-off-by: Ruihang Xia <[email protected]>
    
    ---------
    
    Signed-off-by: Ruihang Xia <[email protected]>
---
 core/src/raw/http_util/header.rs | 15 +++++++++++++++
 core/src/raw/http_util/mod.rs    |  1 +
 core/src/services/s3/backend.rs  |  1 +
 core/src/services/s3/core.rs     | 11 +++++++++++
 core/src/services/s3/writer.rs   |  5 +++++
 5 files changed, 33 insertions(+)

diff --git a/core/src/raw/http_util/header.rs b/core/src/raw/http_util/header.rs
index bc406f84b..32553ab06 100644
--- a/core/src/raw/http_util/header.rs
+++ b/core/src/raw/http_util/header.rs
@@ -221,6 +221,21 @@ pub fn format_content_md5(bs: &[u8]) -> String {
     general_purpose::STANDARD.encode(hasher.finalize())
 }
 
+/// format content md5 header by given iter of bytes.
+pub fn format_content_md5_iter<I>(bs: I) -> String
+where
+    I: IntoIterator,
+    I::Item: AsRef<[u8]>,
+{
+    let mut hasher = md5::Md5::new();
+
+    for b in bs {
+        hasher.update(b.as_ref());
+    }
+
+    general_purpose::STANDARD.encode(hasher.finalize())
+}
+
 /// format authorization header by basic auth.
 ///
 /// # Errors
diff --git a/core/src/raw/http_util/mod.rs b/core/src/raw/http_util/mod.rs
index f031ca68b..1c0aa8cf9 100644
--- a/core/src/raw/http_util/mod.rs
+++ b/core/src/raw/http_util/mod.rs
@@ -38,6 +38,7 @@ pub use header::build_header_value;
 pub use header::format_authorization_by_basic;
 pub use header::format_authorization_by_bearer;
 pub use header::format_content_md5;
+pub use header::format_content_md5_iter;
 pub use header::parse_content_disposition;
 pub use header::parse_content_encoding;
 pub use header::parse_content_length;
diff --git a/core/src/services/s3/backend.rs b/core/src/services/s3/backend.rs
index e772a5642..3bcbf27cb 100644
--- a/core/src/services/s3/backend.rs
+++ b/core/src/services/s3/backend.rs
@@ -771,6 +771,7 @@ impl Builder for S3Builder {
 
         let checksum_algorithm = match 
self.config.checksum_algorithm.as_deref() {
             Some("crc32c") => Some(ChecksumAlgorithm::Crc32c),
+            Some("md5") => Some(ChecksumAlgorithm::Md5),
             None => None,
             v => {
                 return Err(Error::new(
diff --git a/core/src/services/s3/core.rs b/core/src/services/s3/core.rs
index 06232589a..9e63d5776 100644
--- a/core/src/services/s3/core.rs
+++ b/core/src/services/s3/core.rs
@@ -275,6 +275,7 @@ impl S3Core {
                     .for_each(|b| crc = crc32c::crc32c_append(crc, &b));
                 Some(BASE64_STANDARD.encode(crc.to_be_bytes()))
             }
+            Some(ChecksumAlgorithm::Md5) => 
Some(format_content_md5_iter(body.clone())),
         }
     }
     pub fn insert_checksum_header(
@@ -588,6 +589,12 @@ impl S3Core {
         // Set SSE headers.
         req = self.insert_sse_headers(req, true);
 
+        // Calculate Checksum.
+        if let Some(checksum) = self.calculate_checksum(&body) {
+            // Set Checksum header.
+            req = self.insert_checksum_header(req, &checksum);
+        }
+
         // Inject operation to the request.
         req = req.extension(Operation::Write);
 
@@ -1251,11 +1258,14 @@ pub struct ListObjectVersionsOutputDeleteMarker {
 
 pub enum ChecksumAlgorithm {
     Crc32c,
+    /// Mapping to the `Content-MD5` header from S3.
+    Md5,
 }
 impl ChecksumAlgorithm {
     pub fn to_header_name(&self) -> HeaderName {
         match self {
             Self::Crc32c => HeaderName::from_static("x-amz-checksum-crc32c"),
+            Self::Md5 => HeaderName::from_static("content-md5"),
         }
     }
 }
@@ -1266,6 +1276,7 @@ impl Display for ChecksumAlgorithm {
             "{}",
             match self {
                 Self::Crc32c => "CRC32C",
+                Self::Md5 => "MD5",
             }
         )
     }
diff --git a/core/src/services/s3/writer.rs b/core/src/services/s3/writer.rs
index 54a1b6e3f..c88ed61ea 100644
--- a/core/src/services/s3/writer.rs
+++ b/core/src/services/s3/writer.rs
@@ -172,6 +172,11 @@ impl oio::MultipartWrite for S3Writer {
                         etag: p.etag.clone(),
                         checksum_crc32c: p.checksum.clone(),
                     },
+                    ChecksumAlgorithm::Md5 => 
CompleteMultipartUploadRequestPart {
+                        part_number: p.part_number,
+                        etag: p.etag.clone(),
+                        ..Default::default()
+                    },
                 },
             })
             .collect();

Reply via email to