This is an automated email from the ASF dual-hosted git repository.
pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 6381cf4 [BEAM-11494][BEAM-11821] FileIO stops overwriting files on
retries (AWS SDK v2)
new ba2daad Merge pull request #15974 from [BEAM-11494][BEAM-11821]
FileIO stops overwriting files on retries (AWS SDK v2)
6381cf4 is described below
commit 6381cf476bd7c32ab5726f321239c789d1654564
Author: mosche <[email protected]>
AuthorDate: Mon Nov 15 15:14:56 2021 +0100
[BEAM-11494][BEAM-11821] FileIO stops overwriting files on retries (AWS SDK
v2)
---
.../apache/beam/sdk/io/aws2/s3/S3FileSystem.java | 24 ++++++++++++++--------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git
a/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystem.java
b/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystem.java
index 11a5215..d7f4ec4 100644
---
a/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystem.java
+++
b/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/s3/S3FileSystem.java
@@ -229,9 +229,10 @@ class S3FileSystem extends FileSystem<S3ResourceId> {
exception = pathWithEncoding.getException();
break;
} else {
+ // TODO(BEAM-11821): Support file checksum in this method.
metadatas.add(
createBeamMetadata(
- pathWithEncoding.getPath(),
pathWithEncoding.getContentEncoding()));
+ pathWithEncoding.getPath(),
pathWithEncoding.getContentEncoding(), null));
}
}
@@ -389,21 +390,26 @@ class S3FileSystem extends FileSystem<S3ResourceId> {
createBeamMetadata(
path.withSize(s3ObjectHead.contentLength())
.withLastModified(Date.from(s3ObjectHead.lastModified())),
- Strings.nullToEmpty(s3ObjectHead.contentEncoding()))));
+ Strings.nullToEmpty(s3ObjectHead.contentEncoding()),
+ s3ObjectHead.eTag())));
}
private static MatchResult.Metadata createBeamMetadata(
- S3ResourceId path, String contentEncoding) {
+ S3ResourceId path, String contentEncoding, String eTag) {
checkArgument(path.getSize().isPresent(), "The resource id should have a
size.");
checkNotNull(contentEncoding, "contentEncoding");
boolean isReadSeekEfficient =
!NON_READ_SEEK_EFFICIENT_ENCODINGS.contains(contentEncoding);
- return MatchResult.Metadata.builder()
- .setIsReadSeekEfficient(isReadSeekEfficient)
- .setResourceId(path)
- .setSizeBytes(path.getSize().get())
-
.setLastModifiedMillis(path.getLastModified().transform(Date::getTime).or(0L))
- .build();
+ MatchResult.Metadata.Builder ret =
+ MatchResult.Metadata.builder()
+ .setIsReadSeekEfficient(isReadSeekEfficient)
+ .setResourceId(path)
+ .setSizeBytes(path.getSize().get())
+
.setLastModifiedMillis(path.getLastModified().transform(Date::getTime).or(0L));
+ if (eTag != null) {
+ ret.setChecksum(eTag);
+ }
+ return ret.build();
}
@Override