This is an automated email from the ASF dual-hosted git repository. stevel pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new cf5800a9827 HADOOP-19286: S3A: Support cross region access when S3 region/endpoint is set (#7093) (#7067) cf5800a9827 is described below commit cf5800a9827540f7cec83d93d2861ac806bc885c Author: Syed Shameerur Rahman <rhma...@amazon.com> AuthorDate: Tue Oct 8 22:23:00 2024 +0530 HADOOP-19286: S3A: Support cross region access when S3 region/endpoint is set (#7093) (#7067) Adds new option s3a.cross.region.access.enabled Which is true by default This enables cross region access as a separate config and enable/disables it irrespective of region/endpoint is set. This commit contains (ADDENDUM) (#7098) Contributed by Syed Shameerur Rahman --- .../java/org/apache/hadoop/fs/s3a/Constants.java | 13 ++++++ .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 18 ++++++-- .../site/markdown/tools/hadoop-aws/connecting.md | 10 ++++ .../hadoop/fs/s3a/ITestS3AConfiguration.java | 5 +- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 54 ++++++++++++++++++++++ 5 files changed, 93 insertions(+), 7 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index cf7bc3ddcf2..eff42c1050f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1362,6 +1362,19 @@ public final class Constants { */ public static final String XA_HEADER_PREFIX = "header."; + /** + * S3 cross region access enabled ? + * Value: {@value}. + */ + + public static final String AWS_S3_CROSS_REGION_ACCESS_ENABLED = + "fs.s3a.cross.region.access.enabled"; + /** + * Default value for S3 cross region access enabled: {@value}. + */ + public static final boolean AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT = true; + + /** * AWS S3 region for the bucket. When set bypasses the construction of * region through endpoint url. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 4b3db999247..c9c3eee30ea 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -55,6 +55,8 @@ import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector; import org.apache.hadoop.fs.store.LogExactlyOnce; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -259,8 +261,10 @@ public class DefaultS3ClientFactory extends Configured * <li>If endpoint is configured via via fs.s3a.endpoint, set it. * If no region is configured, try to parse region from endpoint. </li> * <li> If no region is configured, and it could not be parsed from the endpoint, - * set the default region as US_EAST_2 and enable cross region access. </li> + * set the default region as US_EAST_2</li> * <li> If configured region is empty, fallback to SDK resolution chain. </li> + * <li> S3 cross region is enabled by default irrespective of region or endpoint + * is set or not.</li> * </ol> * * @param builder S3 client builder. @@ -320,7 +324,6 @@ public class DefaultS3ClientFactory extends Configured builder.endpointOverride(endpoint); LOG.debug("Setting endpoint to {}", endpoint); } else { - builder.crossRegionAccessEnabled(true); origin = "central endpoint with cross region access"; LOG.debug("Enabling cross region access for endpoint {}", endpointStr); @@ -333,7 +336,6 @@ public class DefaultS3ClientFactory extends Configured // no region is configured, and none could be determined from the endpoint. // Use US_EAST_2 as default. region = Region.of(AWS_S3_DEFAULT_REGION); - builder.crossRegionAccessEnabled(true); builder.region(region); origin = "cross region access fallback"; } else if (configuredRegion.isEmpty()) { @@ -344,8 +346,14 @@ public class DefaultS3ClientFactory extends Configured LOG.debug(SDK_REGION_CHAIN_IN_USE); origin = "SDK region chain"; } - - LOG.debug("Setting region to {} from {}", region, origin); + boolean isCrossRegionAccessEnabled = conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT); + // s3 cross region access + if (isCrossRegionAccessEnabled) { + builder.crossRegionAccessEnabled(true); + } + LOG.debug("Setting region to {} from {} with cross region access {}", + region, origin, isCrossRegionAccessEnabled); } /** diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md index d39c480b7cc..6fa37750ded 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -48,6 +48,16 @@ There are multiple ways to connect to an S3 bucket The S3A connector supports all these; S3 Endpoints are the primary mechanism used -either explicitly declared or automatically determined from the declared region of the bucket. +The S3A connector supports S3 cross region access via AWS SDK which is enabled by default. This allows users to access S3 buckets in a different region than the one defined in the S3 endpoint/region configuration, as long as they are within the same AWS partition. However, S3 cross-region access can be disabled by: +```xml +<property> + <name>fs.s3a.cross.region.access.enabled</name> + <value>false</value> + <description>S3 cross region access</description> +</property> +``` + + Not supported: * AWS [Snowball](https://aws.amazon.com/snowball/). diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index a3b994054e4..967ba885bc9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -439,6 +439,7 @@ public class ITestS3AConfiguration { @Test public void testRequestTimeout() throws Exception { conf = new Configuration(); + skipIfCrossRegionClient(conf); // remove the safety check on minimum durations. AWSClientConfig.setMinimumOperationDuration(Duration.ZERO); try { @@ -632,8 +633,8 @@ public class ITestS3AConfiguration { */ private static void skipIfCrossRegionClient( Configuration configuration) { - if (configuration.get(ENDPOINT, null) == null - && configuration.get(AWS_REGION, null) == null) { + if (configuration.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT)) { skip("Skipping test as cross region client is in use "); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index d06224df5b3..80b061de031 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -44,8 +44,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils; +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -71,6 +73,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { private static final String US_WEST_2 = "us-west-2"; + private static final String SA_EAST_1 = "sa-east-1"; + private static final String EU_WEST_2 = "eu-west-2"; private static final String CN_NORTHWEST_1 = "cn-northwest-1"; @@ -346,6 +350,46 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { assertRequesterPaysFileExistence(newConf); } + @Test + public void testWithOutCrossRegionAccess() throws Exception { + describe("Verify cross region access fails when disabled"); + // skip the test if the region is sa-east-1 + skipCrossRegionTest(); + final Configuration newConf = new Configuration(getConfiguration()); + removeBaseAndBucketOverrides(newConf, + ENDPOINT, + AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_REGION); + // disable cross region access + newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, false); + newConf.set(AWS_REGION, SA_EAST_1); + try (S3AFileSystem fs = new S3AFileSystem()) { + fs.initialize(getFileSystem().getUri(), newConf); + intercept(AWSRedirectException.class, + "does not match the AWS region containing the bucket", + () -> fs.exists(getFileSystem().getWorkingDirectory())); + } + } + + @Test + public void testWithCrossRegionAccess() throws Exception { + describe("Verify cross region access succeed when enabled"); + // skip the test if the region is sa-east-1 + skipCrossRegionTest(); + final Configuration newConf = new Configuration(getConfiguration()); + removeBaseAndBucketOverrides(newConf, + ENDPOINT, + AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_REGION); + // enable cross region access + newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, true); + newConf.set(AWS_REGION, SA_EAST_1); + try (S3AFileSystem fs = new S3AFileSystem()) { + fs.initialize(getFileSystem().getUri(), newConf); + fs.exists(getFileSystem().getWorkingDirectory()); + } + } + @Test public void testCentralEndpointAndSameRegionAsBucket() throws Throwable { describe("Access public bucket using central endpoint and region " @@ -478,6 +522,16 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { assertOpsUsingNewFs(); } + /** + * Skip the test if the region is null or sa-east-1. + */ + private void skipCrossRegionTest() throws IOException { + String region = getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion(); + if (region == null || SA_EAST_1.equals(region)) { + skip("Skipping test since region is null or it is set to sa-east-1"); + } + } + private void assertOpsUsingNewFs() throws IOException { final String file = getMethodName(); final Path basePath = methodPath(); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org