This is an automated email from the ASF dual-hosted git repository.

yufei pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 3d9fc1dee1 [AWS] S3FileIO - Add Cross-Region Bucket Access (#11259)
3d9fc1dee1 is described below

commit 3d9fc1dee1228e742e22234369498ee16b19f5a2
Author: S N Munendra <[email protected]>
AuthorDate: Mon Oct 14 22:37:44 2024 +0530

    [AWS] S3FileIO - Add Cross-Region Bucket Access (#11259)
---
 .../iceberg/aws/s3/TestS3FileIOIntegration.java    | 29 +++++++++++++++++++++-
 .../apache/iceberg/aws/s3/S3FileIOProperties.java  | 22 +++++++++++++++-
 .../iceberg/aws/s3/TestS3FileIOProperties.java     | 11 ++++++++
 docs/docs/aws.md                                   | 16 ++++++++++++
 4 files changed, 76 insertions(+), 2 deletions(-)

diff --git 
a/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
 
b/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
index 388260a546..41a07401a1 100644
--- 
a/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
+++ 
b/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
@@ -182,6 +182,29 @@ public class TestS3FileIOIntegration {
     validateRead(s3FileIO);
   }
 
+  @Test
+  public void testCrossRegionAccessEnabled() throws Exception {
+    clientFactory.initialize(
+        ImmutableMap.of(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, 
"true"));
+    S3Client s3Client = clientFactory.s3();
+    String crossBucketObjectKey = String.format("%s/%s", prefix, 
UUID.randomUUID());
+    String crossBucketObjectUri =
+        String.format("s3://%s/%s", crossRegionBucketName, 
crossBucketObjectKey);
+    try {
+      s3Client.putObject(
+          PutObjectRequest.builder()
+              .bucket(crossRegionBucketName)
+              .key(crossBucketObjectKey)
+              .build(),
+          RequestBody.fromBytes(contentBytes));
+      // make a copy in cross-region bucket
+      S3FileIO s3FileIO = new S3FileIO(clientFactory::s3);
+      validateRead(s3FileIO, crossBucketObjectUri);
+    } finally {
+      AwsIntegTestUtil.cleanS3Bucket(s3Client, crossRegionBucketName, 
crossBucketObjectKey);
+    }
+  }
+
   @Test
   public void testNewInputStreamWithCrossRegionAccessPoint() throws Exception {
     
clientFactory.initialize(ImmutableMap.of(S3FileIOProperties.USE_ARN_REGION_ENABLED,
 "true"));
@@ -550,7 +573,11 @@ public class TestS3FileIOIntegration {
   }
 
   private void validateRead(S3FileIO s3FileIO) throws Exception {
-    InputFile file = s3FileIO.newInputFile(objectUri);
+    validateRead(s3FileIO, objectUri);
+  }
+
+  private void validateRead(S3FileIO s3FileIO, String s3Uri) throws Exception {
+    InputFile file = s3FileIO.newInputFile(s3Uri);
     assertThat(file.getLength()).isEqualTo(contentBytes.length);
     try (InputStream stream = file.newStream()) {
       String result = IoUtils.toUtf8String(stream);
diff --git 
a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java 
b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
index 4ab1514a22..3a43880f31 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
@@ -376,6 +376,16 @@ public class S3FileIOProperties implements Serializable {
 
   public static final boolean DUALSTACK_ENABLED_DEFAULT = false;
 
+  /**
+   * Determines if S3 client will allow Cross-Region bucket access, default to 
false.
+   *
+   * <p>For more details, see
+   * 
https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html
+   */
+  public static final String CROSS_REGION_ACCESS_ENABLED = 
"s3.cross-region-access-enabled";
+
+  public static final boolean CROSS_REGION_ACCESS_ENABLED_DEFAULT = false;
+
   /**
    * Used by {@link S3FileIO}, prefix used for bucket access point 
configuration. To set, we can
    * pass a catalog property.
@@ -442,6 +452,7 @@ public class S3FileIOProperties implements Serializable {
   private final Map<String, String> bucketToAccessPointMapping;
   private boolean isPreloadClientEnabled;
   private final boolean isDualStackEnabled;
+  private final boolean isCrossRegionAccessEnabled;
   private final boolean isPathStyleAccess;
   private final boolean isUseArnRegionEnabled;
   private final boolean isAccelerationEnabled;
@@ -477,6 +488,7 @@ public class S3FileIOProperties implements Serializable {
     this.bucketToAccessPointMapping = Collections.emptyMap();
     this.isPreloadClientEnabled = PRELOAD_CLIENT_ENABLED_DEFAULT;
     this.isDualStackEnabled = DUALSTACK_ENABLED_DEFAULT;
+    this.isCrossRegionAccessEnabled = CROSS_REGION_ACCESS_ENABLED_DEFAULT;
     this.isPathStyleAccess = PATH_STYLE_ACCESS_DEFAULT;
     this.isUseArnRegionEnabled = USE_ARN_REGION_ENABLED_DEFAULT;
     this.isAccelerationEnabled = ACCELERATION_ENABLED_DEFAULT;
@@ -521,6 +533,9 @@ public class S3FileIOProperties implements Serializable {
             properties, ACCELERATION_ENABLED, ACCELERATION_ENABLED_DEFAULT);
     this.isDualStackEnabled =
         PropertyUtil.propertyAsBoolean(properties, DUALSTACK_ENABLED, 
DUALSTACK_ENABLED_DEFAULT);
+    this.isCrossRegionAccessEnabled =
+        PropertyUtil.propertyAsBoolean(
+            properties, CROSS_REGION_ACCESS_ENABLED, 
CROSS_REGION_ACCESS_ENABLED_DEFAULT);
     try {
       this.multiPartSize =
           PropertyUtil.propertyAsInt(properties, MULTIPART_SIZE, 
MULTIPART_SIZE_DEFAULT);
@@ -680,6 +695,10 @@ public class S3FileIOProperties implements Serializable {
     return this.isDualStackEnabled;
   }
 
+  public boolean isCrossRegionAccessEnabled() {
+    return this.isCrossRegionAccessEnabled;
+  }
+
   public boolean isPathStyleAccess() {
     return this.isPathStyleAccess;
   }
@@ -832,7 +851,7 @@ public class S3FileIOProperties implements Serializable {
 
   /**
    * Configure services settings for an S3 client. The settings include: 
s3DualStack,
-   * s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
+   * crossRegionAccessEnabled, s3UseArnRegion, s3PathStyleAccess, and 
s3Acceleration
    *
    * <p>Sample usage:
    *
@@ -843,6 +862,7 @@ public class S3FileIOProperties implements Serializable {
   public <T extends S3ClientBuilder> void applyServiceConfigurations(T 
builder) {
     builder
         .dualstackEnabled(isDualStackEnabled)
+        .crossRegionAccessEnabled(isCrossRegionAccessEnabled)
         .serviceConfiguration(
             S3Configuration.builder()
                 .pathStyleAccessEnabled(isPathStyleAccess)
diff --git 
a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java 
b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
index a61b9efb9f..71b931257c 100644
--- a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
+++ b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
@@ -72,6 +72,9 @@ public class TestS3FileIOProperties {
     assertThat(S3FileIOProperties.DUALSTACK_ENABLED_DEFAULT)
         .isEqualTo(s3FileIOProperties.isDualStackEnabled());
 
+    assertThat(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED_DEFAULT)
+        .isEqualTo(s3FileIOProperties.isCrossRegionAccessEnabled());
+
     assertThat(S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT)
         .isEqualTo(s3FileIOProperties.isPathStyleAccess());
 
@@ -155,6 +158,11 @@ public class TestS3FileIOProperties {
             S3FileIOProperties.DUALSTACK_ENABLED,
             String.valueOf(s3FileIOProperties.isDualStackEnabled()));
 
+    assertThat(map)
+        .containsEntry(
+            S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED,
+            String.valueOf(s3FileIOProperties.isCrossRegionAccessEnabled()));
+
     assertThat(map)
         .containsEntry(
             S3FileIOProperties.PATH_STYLE_ACCESS,
@@ -382,6 +390,7 @@ public class TestS3FileIOProperties {
     map.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
     map.put(S3FileIOProperties.ACCELERATION_ENABLED, "true");
     map.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
+    map.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
     map.put(
         S3FileIOProperties.MULTIPART_SIZE,
         String.valueOf(S3FileIOProperties.MULTIPART_SIZE_DEFAULT));
@@ -427,6 +436,7 @@ public class TestS3FileIOProperties {
   public void testApplyS3ServiceConfigurations() {
     Map<String, String> properties = Maps.newHashMap();
     properties.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
+    properties.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
     properties.put(S3FileIOProperties.PATH_STYLE_ACCESS, "true");
     properties.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
     // acceleration enabled has to be set to false if path style is true
@@ -438,6 +448,7 @@ public class TestS3FileIOProperties {
         ArgumentCaptor.forClass(S3Configuration.class);
 
     Mockito.doReturn(mockA).when(mockA).dualstackEnabled(Mockito.anyBoolean());
+    
Mockito.doReturn(mockA).when(mockA).crossRegionAccessEnabled(Mockito.anyBoolean());
     
Mockito.doReturn(mockA).when(mockA).serviceConfiguration(Mockito.any(S3Configuration.class));
 
     s3FileIOProperties.applyServiceConfigurations(mockA);
diff --git a/docs/docs/aws.md b/docs/docs/aws.md
index 5a166c0c91..e408cb5a2a 100644
--- a/docs/docs/aws.md
+++ b/docs/docs/aws.md
@@ -514,6 +514,22 @@ spark-sql --conf 
spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCata
 
 For more details on using S3 Access Grants, please refer to [Managing access 
with S3 Access 
Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html).
 
+### S3 Cross-Region Access
+
+S3 Cross-Region bucket access can be turned on by setting catalog property 
`s3.cross-region-access-enabled` to `true`. 
+This is turned off by default to avoid first S3 API call increased latency.
+
+For example, to enable S3 Cross-Region bucket access with Spark 3.3, you can 
start the Spark SQL shell with:
+```
+spark-sql --conf 
spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
+    --conf 
spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
+    --conf spark.sql.catalog.my_catalog.type=glue \
+    --conf 
spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \
+    --conf spark.sql.catalog.my_catalog.s3.cross-region-access-enabled=true
+```
+
+For more details, please refer to [Cross-Region access for Amazon 
S3](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html).
+
 ### S3 Acceleration
 
 [S3 Acceleration](https://aws.amazon.com/s3/transfer-acceleration/) can be 
used to speed up transfers to and from Amazon S3 by as much as 50-500% for 
long-distance transfer of larger objects.

Reply via email to