This is an automated email from the ASF dual-hosted git repository.

stevel pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new cf5800a9827 HADOOP-19286: S3A: Support cross region access when S3 
region/endpoint is set (#7093) (#7067)
cf5800a9827 is described below

commit cf5800a9827540f7cec83d93d2861ac806bc885c
Author: Syed Shameerur Rahman <rhma...@amazon.com>
AuthorDate: Tue Oct 8 22:23:00 2024 +0530

    HADOOP-19286: S3A: Support cross region access when S3 region/endpoint is 
set (#7093) (#7067)
    
    
    Adds new option
       s3a.cross.region.access.enabled
    Which is true by default
    
    This enables cross region access as a separate config and enable/disables it
    irrespective of region/endpoint is set.
    
    This commit contains (ADDENDUM) (#7098)
    
    Contributed by Syed Shameerur Rahman
---
 .../java/org/apache/hadoop/fs/s3a/Constants.java   | 13 ++++++
 .../hadoop/fs/s3a/DefaultS3ClientFactory.java      | 18 ++++++--
 .../site/markdown/tools/hadoop-aws/connecting.md   | 10 ++++
 .../hadoop/fs/s3a/ITestS3AConfiguration.java       |  5 +-
 .../hadoop/fs/s3a/ITestS3AEndpointRegion.java      | 54 ++++++++++++++++++++++
 5 files changed, 93 insertions(+), 7 deletions(-)

diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index cf7bc3ddcf2..eff42c1050f 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -1362,6 +1362,19 @@ public final class Constants {
    */
   public static final String XA_HEADER_PREFIX = "header.";
 
+  /**
+   * S3 cross region access enabled ?
+   * Value: {@value}.
+   */
+
+  public static final String AWS_S3_CROSS_REGION_ACCESS_ENABLED =
+      "fs.s3a.cross.region.access.enabled";
+  /**
+   * Default value for S3 cross region access enabled: {@value}.
+   */
+  public static final boolean AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT = 
true;
+
+
   /**
    * AWS S3 region for the bucket. When set bypasses the construction of
    * region through endpoint url.
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
index 4b3db999247..c9c3eee30ea 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
@@ -55,6 +55,8 @@ import 
org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
 import org.apache.hadoop.fs.store.LogExactlyOnce;
 
 import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
+import static 
org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED;
+import static 
org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION;
 import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
@@ -259,8 +261,10 @@ public class DefaultS3ClientFactory extends Configured
    * <li>If endpoint is configured via via fs.s3a.endpoint, set it.
    *     If no region is configured, try to parse region from endpoint. </li>
    * <li> If no region is configured, and it could not be parsed from the 
endpoint,
-   *     set the default region as US_EAST_2 and enable cross region access. 
</li>
+   *     set the default region as US_EAST_2</li>
    * <li> If configured region is empty, fallback to SDK resolution chain. 
</li>
+   * <li> S3 cross region is enabled by default irrespective of region or 
endpoint
+   *      is set or not.</li>
    * </ol>
    *
    * @param builder S3 client builder.
@@ -320,7 +324,6 @@ public class DefaultS3ClientFactory extends Configured
         builder.endpointOverride(endpoint);
         LOG.debug("Setting endpoint to {}", endpoint);
       } else {
-        builder.crossRegionAccessEnabled(true);
         origin = "central endpoint with cross region access";
         LOG.debug("Enabling cross region access for endpoint {}",
             endpointStr);
@@ -333,7 +336,6 @@ public class DefaultS3ClientFactory extends Configured
       // no region is configured, and none could be determined from the 
endpoint.
       // Use US_EAST_2 as default.
       region = Region.of(AWS_S3_DEFAULT_REGION);
-      builder.crossRegionAccessEnabled(true);
       builder.region(region);
       origin = "cross region access fallback";
     } else if (configuredRegion.isEmpty()) {
@@ -344,8 +346,14 @@ public class DefaultS3ClientFactory extends Configured
       LOG.debug(SDK_REGION_CHAIN_IN_USE);
       origin = "SDK region chain";
     }
-
-    LOG.debug("Setting region to {} from {}", region, origin);
+    boolean isCrossRegionAccessEnabled = 
conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
+        AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT);
+    // s3 cross region access
+    if (isCrossRegionAccessEnabled) {
+      builder.crossRegionAccessEnabled(true);
+    }
+    LOG.debug("Setting region to {} from {} with cross region access {}",
+        region, origin, isCrossRegionAccessEnabled);
   }
 
   /**
diff --git 
a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md 
b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
index d39c480b7cc..6fa37750ded 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
@@ -48,6 +48,16 @@ There are multiple ways to connect to an S3 bucket
 
 The S3A connector supports all these; S3 Endpoints are the primary mechanism 
used -either explicitly declared or automatically determined from the declared 
region of the bucket.
 
+The S3A connector supports S3 cross region access via AWS SDK which is enabled 
by default. This allows users to access S3 buckets in a different region than 
the one defined in the S3 endpoint/region configuration, as long as they are 
within the same AWS partition. However, S3 cross-region access can be disabled 
by:
+```xml
+<property>
+  <name>fs.s3a.cross.region.access.enabled</name>
+  <value>false</value>
+  <description>S3 cross region access</description>
+</property>
+```
+
+
 Not supported:
 * AWS [Snowball](https://aws.amazon.com/snowball/).
 
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
index a3b994054e4..967ba885bc9 100644
--- 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
@@ -439,6 +439,7 @@ public class ITestS3AConfiguration {
   @Test
   public void testRequestTimeout() throws Exception {
     conf = new Configuration();
+    skipIfCrossRegionClient(conf);
     // remove the safety check on minimum durations.
     AWSClientConfig.setMinimumOperationDuration(Duration.ZERO);
     try {
@@ -632,8 +633,8 @@ public class ITestS3AConfiguration {
    */
   private static void skipIfCrossRegionClient(
       Configuration configuration) {
-    if (configuration.get(ENDPOINT, null) == null
-        && configuration.get(AWS_REGION, null) == null) {
+    if (configuration.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
+        AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT)) {
       skip("Skipping test as cross region client is in use ");
     }
   }
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
index d06224df5b3..80b061de031 100644
--- 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
@@ -44,8 +44,10 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext;
 import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils;
 
+import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
 import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
+import static 
org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED;
 import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
@@ -71,6 +73,8 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
 
   private static final String US_WEST_2 = "us-west-2";
 
+  private static final String SA_EAST_1 = "sa-east-1";
+
   private static final String EU_WEST_2 = "eu-west-2";
 
   private static final String CN_NORTHWEST_1 = "cn-northwest-1";
@@ -346,6 +350,46 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
     assertRequesterPaysFileExistence(newConf);
   }
 
+  @Test
+  public void testWithOutCrossRegionAccess() throws Exception {
+    describe("Verify cross region access fails when disabled");
+    // skip the test if the region is sa-east-1
+    skipCrossRegionTest();
+    final Configuration newConf = new Configuration(getConfiguration());
+    removeBaseAndBucketOverrides(newConf,
+        ENDPOINT,
+        AWS_S3_CROSS_REGION_ACCESS_ENABLED,
+        AWS_REGION);
+    // disable cross region access
+    newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, false);
+    newConf.set(AWS_REGION, SA_EAST_1);
+    try (S3AFileSystem fs = new S3AFileSystem()) {
+      fs.initialize(getFileSystem().getUri(), newConf);
+      intercept(AWSRedirectException.class,
+          "does not match the AWS region containing the bucket",
+          () -> fs.exists(getFileSystem().getWorkingDirectory()));
+    }
+  }
+
+  @Test
+  public void testWithCrossRegionAccess() throws Exception {
+    describe("Verify cross region access succeed when enabled");
+    // skip the test if the region is sa-east-1
+    skipCrossRegionTest();
+    final Configuration newConf = new Configuration(getConfiguration());
+    removeBaseAndBucketOverrides(newConf,
+        ENDPOINT,
+        AWS_S3_CROSS_REGION_ACCESS_ENABLED,
+        AWS_REGION);
+    // enable cross region access
+    newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, true);
+    newConf.set(AWS_REGION, SA_EAST_1);
+    try (S3AFileSystem fs = new S3AFileSystem()) {
+      fs.initialize(getFileSystem().getUri(), newConf);
+      fs.exists(getFileSystem().getWorkingDirectory());
+    }
+  }
+
   @Test
   public void testCentralEndpointAndSameRegionAsBucket() throws Throwable {
     describe("Access public bucket using central endpoint and region "
@@ -478,6 +522,16 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
     assertOpsUsingNewFs();
   }
 
+  /**
+   * Skip the test if the region is null or sa-east-1.
+   */
+  private void skipCrossRegionTest() throws IOException {
+    String region = 
getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion();
+    if (region == null || SA_EAST_1.equals(region)) {
+      skip("Skipping test since region is null or it is set to sa-east-1");
+    }
+  }
+
   private void assertOpsUsingNewFs() throws IOException {
     final String file = getMethodName();
     final Path basePath = methodPath();


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to