This is an automated email from the ASF dual-hosted git repository.
yufei pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 3d9fc1dee1 [AWS] S3FileIO - Add Cross-Region Bucket Access (#11259)
3d9fc1dee1 is described below
commit 3d9fc1dee1228e742e22234369498ee16b19f5a2
Author: S N Munendra <[email protected]>
AuthorDate: Mon Oct 14 22:37:44 2024 +0530
[AWS] S3FileIO - Add Cross-Region Bucket Access (#11259)
---
.../iceberg/aws/s3/TestS3FileIOIntegration.java | 29 +++++++++++++++++++++-
.../apache/iceberg/aws/s3/S3FileIOProperties.java | 22 +++++++++++++++-
.../iceberg/aws/s3/TestS3FileIOProperties.java | 11 ++++++++
docs/docs/aws.md | 16 ++++++++++++
4 files changed, 76 insertions(+), 2 deletions(-)
diff --git
a/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
b/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
index 388260a546..41a07401a1 100644
---
a/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
+++
b/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
@@ -182,6 +182,29 @@ public class TestS3FileIOIntegration {
validateRead(s3FileIO);
}
+ @Test
+ public void testCrossRegionAccessEnabled() throws Exception {
+ clientFactory.initialize(
+ ImmutableMap.of(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED,
"true"));
+ S3Client s3Client = clientFactory.s3();
+ String crossBucketObjectKey = String.format("%s/%s", prefix,
UUID.randomUUID());
+ String crossBucketObjectUri =
+ String.format("s3://%s/%s", crossRegionBucketName,
crossBucketObjectKey);
+ try {
+ s3Client.putObject(
+ PutObjectRequest.builder()
+ .bucket(crossRegionBucketName)
+ .key(crossBucketObjectKey)
+ .build(),
+ RequestBody.fromBytes(contentBytes));
+ // make a copy in cross-region bucket
+ S3FileIO s3FileIO = new S3FileIO(clientFactory::s3);
+ validateRead(s3FileIO, crossBucketObjectUri);
+ } finally {
+ AwsIntegTestUtil.cleanS3Bucket(s3Client, crossRegionBucketName,
crossBucketObjectKey);
+ }
+ }
+
@Test
public void testNewInputStreamWithCrossRegionAccessPoint() throws Exception {
clientFactory.initialize(ImmutableMap.of(S3FileIOProperties.USE_ARN_REGION_ENABLED,
"true"));
@@ -550,7 +573,11 @@ public class TestS3FileIOIntegration {
}
private void validateRead(S3FileIO s3FileIO) throws Exception {
- InputFile file = s3FileIO.newInputFile(objectUri);
+ validateRead(s3FileIO, objectUri);
+ }
+
+ private void validateRead(S3FileIO s3FileIO, String s3Uri) throws Exception {
+ InputFile file = s3FileIO.newInputFile(s3Uri);
assertThat(file.getLength()).isEqualTo(contentBytes.length);
try (InputStream stream = file.newStream()) {
String result = IoUtils.toUtf8String(stream);
diff --git
a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
index 4ab1514a22..3a43880f31 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
@@ -376,6 +376,16 @@ public class S3FileIOProperties implements Serializable {
public static final boolean DUALSTACK_ENABLED_DEFAULT = false;
+ /**
+ * Determines if S3 client will allow Cross-Region bucket access, default to
false.
+ *
+ * <p>For more details, see
+ *
https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html
+ */
+ public static final String CROSS_REGION_ACCESS_ENABLED =
"s3.cross-region-access-enabled";
+
+ public static final boolean CROSS_REGION_ACCESS_ENABLED_DEFAULT = false;
+
/**
* Used by {@link S3FileIO}, prefix used for bucket access point
configuration. To set, we can
* pass a catalog property.
@@ -442,6 +452,7 @@ public class S3FileIOProperties implements Serializable {
private final Map<String, String> bucketToAccessPointMapping;
private boolean isPreloadClientEnabled;
private final boolean isDualStackEnabled;
+ private final boolean isCrossRegionAccessEnabled;
private final boolean isPathStyleAccess;
private final boolean isUseArnRegionEnabled;
private final boolean isAccelerationEnabled;
@@ -477,6 +488,7 @@ public class S3FileIOProperties implements Serializable {
this.bucketToAccessPointMapping = Collections.emptyMap();
this.isPreloadClientEnabled = PRELOAD_CLIENT_ENABLED_DEFAULT;
this.isDualStackEnabled = DUALSTACK_ENABLED_DEFAULT;
+ this.isCrossRegionAccessEnabled = CROSS_REGION_ACCESS_ENABLED_DEFAULT;
this.isPathStyleAccess = PATH_STYLE_ACCESS_DEFAULT;
this.isUseArnRegionEnabled = USE_ARN_REGION_ENABLED_DEFAULT;
this.isAccelerationEnabled = ACCELERATION_ENABLED_DEFAULT;
@@ -521,6 +533,9 @@ public class S3FileIOProperties implements Serializable {
properties, ACCELERATION_ENABLED, ACCELERATION_ENABLED_DEFAULT);
this.isDualStackEnabled =
PropertyUtil.propertyAsBoolean(properties, DUALSTACK_ENABLED,
DUALSTACK_ENABLED_DEFAULT);
+ this.isCrossRegionAccessEnabled =
+ PropertyUtil.propertyAsBoolean(
+ properties, CROSS_REGION_ACCESS_ENABLED,
CROSS_REGION_ACCESS_ENABLED_DEFAULT);
try {
this.multiPartSize =
PropertyUtil.propertyAsInt(properties, MULTIPART_SIZE,
MULTIPART_SIZE_DEFAULT);
@@ -680,6 +695,10 @@ public class S3FileIOProperties implements Serializable {
return this.isDualStackEnabled;
}
+ public boolean isCrossRegionAccessEnabled() {
+ return this.isCrossRegionAccessEnabled;
+ }
+
public boolean isPathStyleAccess() {
return this.isPathStyleAccess;
}
@@ -832,7 +851,7 @@ public class S3FileIOProperties implements Serializable {
/**
* Configure services settings for an S3 client. The settings include:
s3DualStack,
- * s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
+ * crossRegionAccessEnabled, s3UseArnRegion, s3PathStyleAccess, and
s3Acceleration
*
* <p>Sample usage:
*
@@ -843,6 +862,7 @@ public class S3FileIOProperties implements Serializable {
public <T extends S3ClientBuilder> void applyServiceConfigurations(T
builder) {
builder
.dualstackEnabled(isDualStackEnabled)
+ .crossRegionAccessEnabled(isCrossRegionAccessEnabled)
.serviceConfiguration(
S3Configuration.builder()
.pathStyleAccessEnabled(isPathStyleAccess)
diff --git
a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
index a61b9efb9f..71b931257c 100644
--- a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
+++ b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
@@ -72,6 +72,9 @@ public class TestS3FileIOProperties {
assertThat(S3FileIOProperties.DUALSTACK_ENABLED_DEFAULT)
.isEqualTo(s3FileIOProperties.isDualStackEnabled());
+ assertThat(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED_DEFAULT)
+ .isEqualTo(s3FileIOProperties.isCrossRegionAccessEnabled());
+
assertThat(S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT)
.isEqualTo(s3FileIOProperties.isPathStyleAccess());
@@ -155,6 +158,11 @@ public class TestS3FileIOProperties {
S3FileIOProperties.DUALSTACK_ENABLED,
String.valueOf(s3FileIOProperties.isDualStackEnabled()));
+ assertThat(map)
+ .containsEntry(
+ S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED,
+ String.valueOf(s3FileIOProperties.isCrossRegionAccessEnabled()));
+
assertThat(map)
.containsEntry(
S3FileIOProperties.PATH_STYLE_ACCESS,
@@ -382,6 +390,7 @@ public class TestS3FileIOProperties {
map.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
map.put(S3FileIOProperties.ACCELERATION_ENABLED, "true");
map.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
+ map.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
map.put(
S3FileIOProperties.MULTIPART_SIZE,
String.valueOf(S3FileIOProperties.MULTIPART_SIZE_DEFAULT));
@@ -427,6 +436,7 @@ public class TestS3FileIOProperties {
public void testApplyS3ServiceConfigurations() {
Map<String, String> properties = Maps.newHashMap();
properties.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
+ properties.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
properties.put(S3FileIOProperties.PATH_STYLE_ACCESS, "true");
properties.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
// acceleration enabled has to be set to false if path style is true
@@ -438,6 +448,7 @@ public class TestS3FileIOProperties {
ArgumentCaptor.forClass(S3Configuration.class);
Mockito.doReturn(mockA).when(mockA).dualstackEnabled(Mockito.anyBoolean());
+
Mockito.doReturn(mockA).when(mockA).crossRegionAccessEnabled(Mockito.anyBoolean());
Mockito.doReturn(mockA).when(mockA).serviceConfiguration(Mockito.any(S3Configuration.class));
s3FileIOProperties.applyServiceConfigurations(mockA);
diff --git a/docs/docs/aws.md b/docs/docs/aws.md
index 5a166c0c91..e408cb5a2a 100644
--- a/docs/docs/aws.md
+++ b/docs/docs/aws.md
@@ -514,6 +514,22 @@ spark-sql --conf
spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCata
For more details on using S3 Access Grants, please refer to [Managing access
with S3 Access
Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html).
+### S3 Cross-Region Access
+
+S3 Cross-Region bucket access can be turned on by setting catalog property
`s3.cross-region-access-enabled` to `true`.
+This is turned off by default to avoid first S3 API call increased latency.
+
+For example, to enable S3 Cross-Region bucket access with Spark 3.3, you can
start the Spark SQL shell with:
+```
+spark-sql --conf
spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
+ --conf
spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
+ --conf spark.sql.catalog.my_catalog.type=glue \
+ --conf
spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \
+ --conf spark.sql.catalog.my_catalog.s3.cross-region-access-enabled=true
+```
+
+For more details, please refer to [Cross-Region access for Amazon
S3](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html).
+
### S3 Acceleration
[S3 Acceleration](https://aws.amazon.com/s3/transfer-acceleration/) can be
used to speed up transfers to and from Amazon S3 by as much as 50-500% for
long-distance transfer of larger objects.