dannycjones commented on code in PR #4706:
URL: https://github.com/apache/hadoop/pull/4706#discussion_r954753108
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -390,4 +487,68 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3
s3,
endpoint, epr, region);
return new AwsClientBuilder.EndpointConfiguration(endpoint, region);
}
+
+ /**
+ * Given a endpoint string, create the endpoint URI.
+ *
+ * @param endpoint possibly null endpoint.
+ * @param conf config to build the URI from.
+ * @return an endpoint uri
+ */
+ private static URI getS3Endpoint(String endpoint, final Configuration conf) {
+
+ boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS,
DEFAULT_SECURE_CONNECTIONS);
+
+ String protocol = secureConnections ? "https" : "http";
+
+ if (endpoint == null || endpoint.isEmpty()) {
+ // the default endpoint
+ endpoint = CENTRAL_ENDPOINT;
+ }
+
+ if (!endpoint.contains("://")) {
+ endpoint = String.format("%s://%s", protocol, endpoint);
+ }
+
+ try {
+ return new URI(endpoint);
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ /**
+ * Get the bucket region.
+ *
+ * @param region AWS S3 Region set in the config. This property may not be
set, in which case
+ * ask S3 for the region.
+ * @param credentialsProvider Credentials provider to be used with the
default s3 client.
+ * @return region of the bucket.
+ */
+ private Region getS3Region(String region, AWSCredentialsProvider
credentialsProvider) {
+
+ if (!StringUtils.isBlank(region)) {
+ return Region.of(region);
+ }
+
+ try {
+ // build a s3 client with region eu-west-2 that can be used to get the
region of the bucket.
+ S3Client s3Client = S3Client.builder().region(Region.EU_WEST_2)
+
.credentialsProvider(V1V2AwsCredentialProviderAdapter.adapt(credentialsProvider))
+ .build();
+
+ HeadBucketResponse headBucketResponse =
+
s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build());
+ return Region.of(
+
headBucketResponse.sdkHttpResponse().headers().get("x-amz-bucket-region").get(0));
+ } catch (S3Exception exception) {
+ if (exception.statusCode() == 301) {
+ List<String> bucketRegion =
+
exception.awsErrorDetails().sdkHttpResponse().headers().get("x-amz-bucket-region");
Review Comment:
Let's add a constant for `x-amz-bucket-region` since its used twice already
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java:
##########
@@ -1203,4 +1203,5 @@ private Constants() {
* Default maximum read size in bytes during vectored reads : {@value}.
*/
public static final int DEFAULT_AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE =
1253376; //1M
+
Review Comment:
I'd try and avoid changes like this reviewing the diff in SourceTree or CLI
before pushing. (not blocking this patch)
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -390,4 +487,68 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3
s3,
endpoint, epr, region);
return new AwsClientBuilder.EndpointConfiguration(endpoint, region);
}
+
+ /**
+ * Given a endpoint string, create the endpoint URI.
+ *
+ * @param endpoint possibly null endpoint.
+ * @param conf config to build the URI from.
+ * @return an endpoint uri
+ */
+ private static URI getS3Endpoint(String endpoint, final Configuration conf) {
+
+ boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS,
DEFAULT_SECURE_CONNECTIONS);
+
+ String protocol = secureConnections ? "https" : "http";
+
+ if (endpoint == null || endpoint.isEmpty()) {
+ // the default endpoint
+ endpoint = CENTRAL_ENDPOINT;
+ }
+
+ if (!endpoint.contains("://")) {
+ endpoint = String.format("%s://%s", protocol, endpoint);
+ }
+
+ try {
+ return new URI(endpoint);
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ /**
+ * Get the bucket region.
+ *
+ * @param region AWS S3 Region set in the config. This property may not be
set, in which case
+ * ask S3 for the region.
+ * @param credentialsProvider Credentials provider to be used with the
default s3 client.
+ * @return region of the bucket.
+ */
+ private Region getS3Region(String region, AWSCredentialsProvider
credentialsProvider) {
+
+ if (!StringUtils.isBlank(region)) {
+ return Region.of(region);
+ }
+
+ try {
+ // build a s3 client with region eu-west-2 that can be used to get the
region of the bucket.
+ S3Client s3Client = S3Client.builder().region(Region.EU_WEST_2)
+
.credentialsProvider(V1V2AwsCredentialProviderAdapter.adapt(credentialsProvider))
+ .build();
+
+ HeadBucketResponse headBucketResponse =
+
s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build());
+ return Region.of(
+
headBucketResponse.sdkHttpResponse().headers().get("x-amz-bucket-region").get(0));
+ } catch (S3Exception exception) {
+ if (exception.statusCode() == 301) {
Review Comment:
Constant for 301? There's one in the SDK:
https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/HttpStatusCode.html#MOVED_PERMANENTLY
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -390,4 +487,68 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3
s3,
endpoint, epr, region);
return new AwsClientBuilder.EndpointConfiguration(endpoint, region);
}
+
+ /**
+ * Given a endpoint string, create the endpoint URI.
+ *
+ * @param endpoint possibly null endpoint.
+ * @param conf config to build the URI from.
+ * @return an endpoint uri
+ */
+ private static URI getS3Endpoint(String endpoint, final Configuration conf) {
+
+ boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS,
DEFAULT_SECURE_CONNECTIONS);
+
+ String protocol = secureConnections ? "https" : "http";
+
+ if (endpoint == null || endpoint.isEmpty()) {
+ // the default endpoint
+ endpoint = CENTRAL_ENDPOINT;
+ }
+
+ if (!endpoint.contains("://")) {
+ endpoint = String.format("%s://%s", protocol, endpoint);
+ }
+
+ try {
+ return new URI(endpoint);
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ /**
+ * Get the bucket region.
+ *
+ * @param region AWS S3 Region set in the config. This property may not be
set, in which case
+ * ask S3 for the region.
+ * @param credentialsProvider Credentials provider to be used with the
default s3 client.
+ * @return region of the bucket.
+ */
+ private Region getS3Region(String region, AWSCredentialsProvider
credentialsProvider) {
+
+ if (!StringUtils.isBlank(region)) {
+ return Region.of(region);
+ }
+
+ try {
+ // build a s3 client with region eu-west-2 that can be used to get the
region of the bucket.
+ S3Client s3Client = S3Client.builder().region(Region.EU_WEST_2)
Review Comment:
`eu-west-2` / London seems a bit of a random decision.
Let's be intentional by using the
[`AWS_GLOBAL`](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/regions/Region.html#AWS_GLOBAL)
region value - this will result in `us-east-1`. (Only works for commercial
regions. Not sure if we can do anything better for China / US Gov Cloud?)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]