This is an automated email from the ASF dual-hosted git repository. stevel pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new a3bbeba0f1e HADOOP-18328. S3A to support S3 on Outposts (#4533) a3bbeba0f1e is described below commit a3bbeba0f1e56da25aaf80b0ab62813a23ac7310 Author: suzu <s.suzugam...@gmail.com> AuthorDate: Wed Aug 23 19:38:07 2023 +0900 HADOOP-18328. S3A to support S3 on Outposts (#4533) Contributed by Sotetsu Suzugamine HADOOP-18328. Add documentation for S3A support on S3 Outposts (#5976) Contributed by Yuting Chen --- .../java/org/apache/hadoop/fs/s3a/ArnResource.java | 10 +++++++-- .../src/site/markdown/tools/hadoop-aws/index.md | 18 ++++++++++++++++ .../org/apache/hadoop/fs/s3a/TestArnResource.java | 24 +++++++++++++++++----- 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index 0294f772290..a85f26223ff 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -26,7 +26,8 @@ import com.amazonaws.arn.Arn; * Represents an Arn Resource, this can be an accesspoint or bucket. */ public final class ArnResource { - private final static String ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com"; + private final static String S3_ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com"; + private final static String S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT = "s3-outposts.%s.amazonaws.com"; /** * Resource name. @@ -69,6 +70,10 @@ public final class ArnResource { this.accessPointRegionKey = String.format("accesspoint-%s", region); } + private boolean isOutposts(){ + return fullArn.contains("s3-outposts"); + } + /** * Resource name. * @return resource name. @@ -106,7 +111,8 @@ public final class ArnResource { * @return resource endpoint. */ public String getEndpoint() { - return String.format(ACCESSPOINT_ENDPOINT_FORMAT, region); + String format = isOutposts() ? S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT : S3_ACCESSPOINT_ENDPOINT_FORMAT; + return String.format(format, region); } /** diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 884cc781df9..0c787de4676 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -1709,6 +1709,24 @@ the storage class you want. Please note that S3A does not support reading from archive storage classes at the moment. `AccessDeniedException` with InvalidObjectState will be thrown if you're trying to do so. +## <a name="upload"></a>Configuring S3A for S3 on Outposts + +S3A now supports [S3 on Outposts](https://docs.aws.amazon.com/AmazonS3/latest/userguide/S3onOutposts.html). +Accessing data through an access point is done by using its Amazon Resource Name (ARN), as opposed to just the bucket name. +The only supported storage class on Outposts is **OUTPOSTS**, and by default objects are encrypted with [SSE-S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-outposts-data-encryption.html). +You can set the Access Point ARN property using the following per bucket configuration property: + +```xml +<property> + <name>fs.s3a.bucket.sample-outpost-bucket.accesspoint.arn</name> + <value>arn:aws:s3-outposts:region:account-id:outpost/outpost-id/accesspoint/accesspoint-name</value> + <description>Configure S3a traffic to use this S3 on Outposts Access Point ARN</description> +</property> +``` + +This configures access to the `sample-outpost-bucket` for S3A to go through the new Access Point ARN. So, for example `s3a://sample-outpost-bucket/key` will now use your configured ARN when getting data from S3 on Outpost instead of your bucket. + + ## <a name="upload"></a>How S3A writes data to S3 The original S3A client implemented file writes by diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index 36381bf14b1..c881aac35d9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -56,7 +56,7 @@ public class TestArnResource extends HadoopTestBase { String region = testPair[0]; String partition = testPair[1]; - ArnResource resource = getArnResourceFrom(partition, region, MOCK_ACCOUNT, accessPoint); + ArnResource resource = getArnResourceFrom(partition, "s3", region, MOCK_ACCOUNT, accessPoint); assertEquals("Access Point name does not match", accessPoint, resource.getName()); assertEquals("Account Id does not match", MOCK_ACCOUNT, resource.getOwnerAccountId()); assertEquals("Region does not match", region, resource.getRegion()); @@ -64,10 +64,10 @@ public class TestArnResource extends HadoopTestBase { } @Test - public void makeSureEndpointHasTheCorrectFormat() { + public void makeSureS3EndpointHasTheCorrectFormat() { // Access point (AP) endpoints are different from S3 bucket endpoints, thus when using APs the // endpoints for the client are modified. This test makes sure endpoint is set up correctly. - ArnResource accessPoint = getArnResourceFrom("aws", "eu-west-1", MOCK_ACCOUNT, + ArnResource accessPoint = getArnResourceFrom("aws", "s3", "eu-west-1", MOCK_ACCOUNT, "test"); String expected = "s3-accesspoint.eu-west-1.amazonaws.com"; @@ -76,6 +76,19 @@ public class TestArnResource extends HadoopTestBase { .isEqualTo(expected); } + @Test + public void makeSureS3OutpostsEndpointHasTheCorrectFormat() { + // Access point (AP) endpoints are different from S3 bucket endpoints, thus when using APs the + // endpoints for the client are modified. This test makes sure endpoint is set up correctly. + ArnResource accessPoint = getArnResourceFrom("aws", "s3-outposts", "eu-west-1", MOCK_ACCOUNT, + "test"); + String expected = "s3-outposts.eu-west-1.amazonaws.com"; + + Assertions.assertThat(accessPoint.getEndpoint()) + .describedAs("Endpoint has invalid format. Access Point requests will not work") + .isEqualTo(expected); + } + @Test public void invalidARNsMustThrow() throws Exception { describe("Using an invalid ARN format must throw when initializing an ArnResource."); @@ -87,15 +100,16 @@ public class TestArnResource extends HadoopTestBase { /** * Create an {@link ArnResource} from string components * @param partition - partition for ARN + * @param service - service for ARN * @param region - region for ARN * @param accountId - accountId for ARN * @param resourceName - ARN resource name * @return ArnResource described by its properties */ - private ArnResource getArnResourceFrom(String partition, String region, String accountId, + private ArnResource getArnResourceFrom(String partition, String service, String region, String accountId, String resourceName) { // arn:partition:service:region:account-id:resource-type/resource-id - String arn = String.format("arn:%s:s3:%s:%s:accesspoint/%s", partition, region, accountId, + String arn = String.format("arn:%s:%s:%s:%s:accesspoint/%s", partition, service, region, accountId, resourceName); return ArnResource.accessPointFromArn(arn); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org