Muon commented on code in PR #562:
URL:
https://github.com/apache/arrow-rs-object-store/pull/562#discussion_r2590924349
##########
src/aws/mod.rs:
##########
@@ -101,6 +103,56 @@ impl AmazonS3 {
fn path_url(&self, path: &Path) -> String {
self.client.config.path_url(path)
}
+
+ /// Perform a multipart copy operation
+ async fn copy_multipart(
+ &self,
+ from: &Path,
+ to: &Path,
+ size: u64,
+ mode: CompleteMultipartMode,
+ ) -> Result<()> {
+ // Perform multipart copy using UploadPartCopy
+ let upload_id = self
+ .client
+ .create_multipart(to, PutMultipartOptions::default())
+ .await?;
+
+ // S3 requires minimum 5 MiB per part (except final) and max 10,000
parts
+ let part_size = self.client.config.multipart_copy_part_size;
+
+ let mut parts = Vec::new();
+ let mut offset: u64 = 0;
+ let mut idx: usize = 0;
+ let res = async {
+ while offset < size {
+ let end = std::cmp::min(offset + part_size, size);
+ let payload = if offset == 0 && end == size {
+ PutPartPayload::Copy(from)
+ } else {
+ PutPartPayload::CopyRange(from, offset..end)
+ };
+ let part = self.client.put_part(to, &upload_id, idx,
payload).await?;
+ parts.push(part);
+ idx += 1;
+ offset = end;
+ }
+ self.client
+ .complete_multipart(to, &upload_id, parts, mode)
+ .await
+ .map(|_| ())
+ }
+ .await;
+
+ // If the multipart upload failed, make a best effort attempt to
+ // clean it up. It's the caller's responsibility to add a
+ // lifecycle rule if guaranteed cleanup is required, as we
+ // cannot protect against an ill-timed process crash.
+ if res.is_err() {
+ let _ = self.client.abort_multipart(to, &upload_id).await;
+ }
Review Comment:
Goes in docstring?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]