[
https://issues.apache.org/jira/browse/HADOOP-18073?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17583562#comment-17583562
]
ASF GitHub Bot commented on HADOOP-18073:
-----------------------------------------
dannycjones commented on code in PR #4706:
URL: https://github.com/apache/hadoop/pull/4706#discussion_r951368873
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java:
##########
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.time.Duration;
+import java.util.concurrent.TimeUnit;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
+import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
+import software.amazon.awssdk.core.retry.RetryPolicy;
+import software.amazon.awssdk.http.apache.ApacheHttpClient;
+import software.amazon.awssdk.http.apache.ProxyConfiguration;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.VersionInfo;
+import org.apache.http.client.utils.URIBuilder;
+
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ESTABLISH_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAXIMUM_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAX_ERROR_RETRIES;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_REQUEST_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SOCKET_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.MAX_ERROR_RETRIES;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_DOMAIN;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_PASSWORD;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_USERNAME;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_WORKSTATION;
+import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX;
+
+public final class AWSClientConfig {
Review Comment:
Add JavaDoc. at first glance, I'm thinking this is actually an SDK class so
let's make clear its purpose. Or maybe rename if we can think of a better one.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java:
##########
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.time.Duration;
+import java.util.concurrent.TimeUnit;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
+import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
+import software.amazon.awssdk.core.retry.RetryPolicy;
+import software.amazon.awssdk.http.apache.ApacheHttpClient;
+import software.amazon.awssdk.http.apache.ProxyConfiguration;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.VersionInfo;
+import org.apache.http.client.utils.URIBuilder;
+
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ESTABLISH_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAXIMUM_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAX_ERROR_RETRIES;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_REQUEST_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SOCKET_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.MAX_ERROR_RETRIES;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_DOMAIN;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_PASSWORD;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_USERNAME;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_WORKSTATION;
+import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX;
+
+public final class AWSClientConfig {
+ private static final Logger LOG =
LoggerFactory.getLogger(AWSClientConfig.class);
+
+ private AWSClientConfig() {
+ }
+
+ public static ClientOverrideConfiguration.Builder
createClientConfigBuilder(Configuration conf) {
+ ClientOverrideConfiguration.Builder overrideConfigBuilder =
+ ClientOverrideConfiguration.builder();
+
+ long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT,
+ DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS);
+
+ if (requestTimeoutMillis > Integer.MAX_VALUE) {
+ LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead",
+ requestTimeoutMillis, Integer.MAX_VALUE);
+ requestTimeoutMillis = Integer.MAX_VALUE;
+ }
+
+ if(requestTimeoutMillis > 0) {
+
overrideConfigBuilder.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis));
+ }
Review Comment:
Maybe we can move to a private method like
`initApiCallTimeout`/`initRequestTimeout` similar to `initUserAgent` since
lines 69 thru 80 are all tightly related
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java:
##########
@@ -451,21 +452,21 @@ private boolean buildNextStatusBatch(S3ListResult
objects) {
int added = 0, ignored = 0;
// list to fill in with results. Initial size will be list maximum.
List<S3AFileStatus> stats = new ArrayList<>(
- objects.getObjectSummaries().size() +
+ objects.getS3Objects().size() +
objects.getCommonPrefixes().size());
// objects
- for (S3ObjectSummary summary : objects.getObjectSummaries()) {
- String key = summary.getKey();
+ for (S3Object s3Object : objects.getS3Objects()) {
+ String key = s3Object.key();
Path keyPath = getStoreContext().getContextAccessors().keyToPath(key);
if (LOG.isDebugEnabled()) {
- LOG.debug("{}: {}", keyPath, stringify(summary));
+ LOG.debug("{}: {}", keyPath, stringify(s3Object));
}
// Skip over keys that are ourselves and old S3N _$folder$ files
- if (acceptor.accept(keyPath, summary) && filter.accept(keyPath)) {
- S3AFileStatus status = createFileStatus(keyPath, summary,
+ if (acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) {
+ S3AFileStatus status = createFileStatus(keyPath, s3Object,
listingOperationCallbacks.getDefaultBlockSize(keyPath),
getStoreContext().getUsername(),
- summary.getETag(), null, isCSEEnabled);
+ s3Object.eTag(), null, isCSEEnabled);
Review Comment:
Fix indent while this line is modified
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java:
##########
@@ -1203,4 +1203,8 @@ private Constants() {
* Default maximum read size in bytes during vectored reads : {@value}.
*/
public static final int DEFAULT_AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE =
1253376; //1M
+
+ public static final String HTTP = "http";
+
+ public static final String HTTPS = "https";
Review Comment:
I'd prefer to keep these in DefaultS3ClientFactory rather than add them to
Constants now, or not use them at all since they're only referred to once and I
don't see much value to them at the moment.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -160,6 +182,84 @@ public AmazonS3 createS3Client(
}
}
+ /**
+ * Creates a new {@link S3Client}.
+ *
+ * @param uri S3A file system URI
+ * @param parameters parameter object
+ * @return S3 client
+ * @throws IOException on any IO problem
+ */
+ @Override
+ public S3Client createS3ClientV2(
+ final URI uri,
+ final S3ClientCreationParameters parameters) throws IOException {
+
+ Configuration conf = getConf();
+ bucket = uri.getHost();
+
+ final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder =
+ AWSClientConfig.createClientConfigBuilder(conf);
+
+ final ApacheHttpClient.Builder httpClientBuilder =
+ AWSClientConfig.createHttpClientBuilder(conf);
+
+ final RetryPolicy.Builder retryPolicyBuilder =
AWSClientConfig.createRetryPolicyBuilder(conf);
+
+ final ProxyConfiguration.Builder proxyConfigBuilder =
+ AWSClientConfig.createProxyConfigurationBuilder(conf, bucket);
+
+ S3ClientBuilder s3ClientBuilder = S3Client.builder();
+
+ // build a s3 client with region eu-west-2 that can be used to get the
region of the bucket.
+ S3Client defaultS3Client = S3Client.builder().region(Region.EU_WEST_2)
+
.credentialsProvider(V1V2AwsCredentialProviderAdapter.adapt(parameters.getCredentialSet()))
+ .build();
Review Comment:
Move this into `getS3Region`? It's only used for determining region - we
don't even need to create the client if the S3A user already specifies
`fs.s3a.endpoint.region`.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java:
##########
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.time.Duration;
+import java.util.concurrent.TimeUnit;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
+import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
+import software.amazon.awssdk.core.retry.RetryPolicy;
+import software.amazon.awssdk.http.apache.ApacheHttpClient;
+import software.amazon.awssdk.http.apache.ProxyConfiguration;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.VersionInfo;
+import org.apache.http.client.utils.URIBuilder;
+
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ESTABLISH_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAXIMUM_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAX_ERROR_RETRIES;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_REQUEST_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SOCKET_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.MAX_ERROR_RETRIES;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_DOMAIN;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_PASSWORD;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_USERNAME;
+import static org.apache.hadoop.fs.s3a.Constants.PROXY_WORKSTATION;
+import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX;
+
+public final class AWSClientConfig {
+ private static final Logger LOG =
LoggerFactory.getLogger(AWSClientConfig.class);
+
+ private AWSClientConfig() {
+ }
+
+ public static ClientOverrideConfiguration.Builder
createClientConfigBuilder(Configuration conf) {
+ ClientOverrideConfiguration.Builder overrideConfigBuilder =
+ ClientOverrideConfiguration.builder();
+
+ long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT,
+ DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS);
+
+ if (requestTimeoutMillis > Integer.MAX_VALUE) {
+ LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead",
+ requestTimeoutMillis, Integer.MAX_VALUE);
+ requestTimeoutMillis = Integer.MAX_VALUE;
+ }
+
+ if(requestTimeoutMillis > 0) {
+
overrideConfigBuilder.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis));
+ }
+
+ initUserAgent(conf, overrideConfigBuilder);
+
+ // TODO: Look at signers. See issue
https://github.com/aws/aws-sdk-java-v2/issues/1024
+ // String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, "");
+ // if (!signerOverride.isEmpty()) {
+ // LOG.debug("Signer override = {}", signerOverride);
+ //
overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.SIGNER)
+ // }
+
+ return overrideConfigBuilder;
+ }
+
+ /**
+ * Configures the http client.
+ *
+ * @param conf The Hadoop configuration
+ * @return Http client builder
+ */
+ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration
conf) {
+ ApacheHttpClient.Builder httpClientBuilder =
+ ApacheHttpClient.builder();
+
+ httpClientBuilder.maxConnections(S3AUtils.intOption(conf,
MAXIMUM_CONNECTIONS,
+ DEFAULT_MAXIMUM_CONNECTIONS, 1));
+
+ httpClientBuilder.connectionTimeout(Duration.ofSeconds(
+ S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT,
0)));
Review Comment:
Consider moving the intOption call to a temporary variable.
```suggestion
int connTimeoutMillis = S3AUtils.intOption(conf, ESTABLISH_TIMEOUT,
DEFAULT_ESTABLISH_TIMEOUT, 0);
httpClientBuilder.connectionTimeout(Duration.ofSeconds(connTimeoutMillis));
```
Also applicable on a few other lines, where we have nested method calls in
the args.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java:
##########
@@ -563,14 +563,16 @@ public ListObjectsRequest newListObjectsV1Request(
final String key,
final String delimiter,
final int maxKeys) {
- ListObjectsRequest request = new ListObjectsRequest()
- .withBucketName(bucket)
- .withMaxKeys(maxKeys)
- .withPrefix(key);
+
+ ListObjectsRequest.Builder requestBuilder =
+
ListObjectsRequest.builder().bucket(bucket).maxKeys(maxKeys).prefix(key);
+
if (delimiter != null) {
- request.setDelimiter(delimiter);
+ requestBuilder.delimiter(delimiter);
}
- return prepareRequest(request);
+
+ //TODO: add call to prepareRequest
+ return requestBuilder.build();
Review Comment:
Should we add a no-op implementation of `<T extends V2Request> T
prepareRequest(T t)` to return and implement later?
This way, we have the correct final code here we don't have to return to and
then leave a single to-do to implement which will solve all calls across the
code base.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java:
##########
@@ -1183,12 +1191,26 @@ AmazonS3 getAmazonS3Client() {
* @param reason a justification for requesting access.
* @return AmazonS3Client
*/
+ // TODO: Remove when we remove S3V1 client
@VisibleForTesting
public AmazonS3 getAmazonS3ClientForTesting(String reason) {
LOG.warn("Access to S3A client requested, reason {}", reason);
return s3;
}
+ /**
+ * Returns the S3 client used by this filesystem.
+ * <i>Warning: this must only be used for testing, as it bypasses core
+ * S3A operations. </i>
+ * @param reason a justification for requesting access.
+ * @return S3Client
+ */
+ @VisibleForTesting
+ public S3Client getAmazonS3V2ClientForTesting(String reason) {
+ LOG.warn("Access to S3A client requested, reason {}", reason);
Review Comment:
while we are here...
```suggestion
LOG.warn("Access to S3 client requested, reason {}", reason);
```
##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java:
##########
@@ -104,7 +104,7 @@ public interface S3ATestConstants {
* Default path for the multi MB test file: {@value}.
*/
String DEFAULT_CSVTEST_FILE = LANDSAT_BUCKET + "scene_list.gz";
-
+
Review Comment:
Drop this
##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java:
##########
@@ -34,8 +35,7 @@
public class MockS3ClientFactory implements S3ClientFactory {
@Override
- public AmazonS3 createS3Client(URI uri,
- final S3ClientCreationParameters parameters) {
+ public AmazonS3 createS3Client(URI uri, final S3ClientCreationParameters
parameters) {
Review Comment:
No need to touch this method. We can revert to reduce diff for reviewers
##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java:
##########
@@ -170,9 +170,11 @@ private void createFactoryObjects(RequestFactory factory) {
a(factory.newGetObjectRequest(path));
a(factory.newGetObjectMetadataRequest(path));
a(factory.newListMultipartUploadsRequest(path));
- a(factory.newListObjectsV1Request(path, "/", 1));
+ //TODO: Commenting out for now, new request extends AwsRequest, this can
be updated once all
+ // request factory operations are updated.
Review Comment:
similar suggestion to RequestPreparer... add no-op implementation, come back
later to add logic. no change needed here later.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -160,6 +182,84 @@ public AmazonS3 createS3Client(
}
}
+ /**
+ * Creates a new {@link S3Client}.
+ *
+ * @param uri S3A file system URI
+ * @param parameters parameter object
+ * @return S3 client
+ * @throws IOException on any IO problem
+ */
+ @Override
+ public S3Client createS3ClientV2(
+ final URI uri,
+ final S3ClientCreationParameters parameters) throws IOException {
+
+ Configuration conf = getConf();
+ bucket = uri.getHost();
+
+ final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder =
+ AWSClientConfig.createClientConfigBuilder(conf);
+
+ final ApacheHttpClient.Builder httpClientBuilder =
+ AWSClientConfig.createHttpClientBuilder(conf);
+
+ final RetryPolicy.Builder retryPolicyBuilder =
AWSClientConfig.createRetryPolicyBuilder(conf);
+
+ final ProxyConfiguration.Builder proxyConfigBuilder =
+ AWSClientConfig.createProxyConfigurationBuilder(conf, bucket);
+
+ S3ClientBuilder s3ClientBuilder = S3Client.builder();
+
+ // build a s3 client with region eu-west-2 that can be used to get the
region of the bucket.
+ S3Client defaultS3Client = S3Client.builder().region(Region.EU_WEST_2)
+
.credentialsProvider(V1V2AwsCredentialProviderAdapter.adapt(parameters.getCredentialSet()))
+ .build();
+
+ // add any headers
+ parameters.getHeaders().forEach((h, v) ->
clientOverrideConfigBuilder.putHeader(h, v));
+
+ if (parameters.isRequesterPays()) {
+ // All calls must acknowledge requester will pay via header.
+ clientOverrideConfigBuilder.putHeader(REQUESTER_PAYS_HEADER,
REQUESTER_PAYS_HEADER_VALUE);
+ }
+
+ if (!StringUtils.isEmpty(parameters.getUserAgentSuffix())) {
+
clientOverrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_SUFFIX,
+ parameters.getUserAgentSuffix());
+ }
+
+ clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build());
+ httpClientBuilder.proxyConfiguration(proxyConfigBuilder.build());
+
+ s3ClientBuilder.httpClientBuilder(httpClientBuilder)
+ .overrideConfiguration(clientOverrideConfigBuilder.build());
+
+ // use adapter classes so V1 credential providers continue to work. This
will be moved to
+ // AWSCredentialProviderList.add() when that class is updated.
+ s3ClientBuilder.credentialsProvider(
+ V1V2AwsCredentialProviderAdapter.adapt(parameters.getCredentialSet()));
+
+ URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf);
+
+ Region region =
+ getS3Region(conf.getTrimmed(AWS_REGION), defaultS3Client);
+
+ LOG.debug("Using endpoint {}; and region {}", endpoint, region);
+
+ s3ClientBuilder.endpointOverride(endpoint).region(region);
+
+ s3ClientBuilder.serviceConfiguration(
+
S3Configuration.builder().pathStyleAccessEnabled(parameters.isPathStyleAccess()).build());
Review Comment:
Move S3Configuration to temporary variable. There's a bunch of config in
there that we may want to let users leverage in the future which could then
just be one line changes.
```suggestion
S3Configuration s3Configuration = S3Configuration.builder()
.pathStyleAccessEnabled(parameters.isPathStyleAccess())
.build();
s3ClientBuilder.serviceConfiguration(s3Configuration);
```
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java:
##########
@@ -584,14 +586,17 @@ public ListObjectsV2Request newListObjectsV2Request(
final String key,
final String delimiter,
final int maxKeys) {
- final ListObjectsV2Request request = new ListObjectsV2Request()
- .withBucketName(bucket)
- .withMaxKeys(maxKeys)
- .withPrefix(key);
+
+ final ListObjectsV2Request.Builder requestBuilder =
+
ListObjectsV2Request.builder().bucket(bucket).maxKeys(maxKeys).prefix(key);
Review Comment:
I'd recommend in general to keep each builder arg on a new line similar to
how it was previously.
```suggestion
final ListObjectsV2Request.Builder requestBuilder =
ListObjectsV2Request.builder()
.bucket(bucket)
.maxKeys(maxKeys)
.prefix(key);
```
> Upgrade AWS SDK to v2
> ---------------------
>
> Key: HADOOP-18073
> URL: https://issues.apache.org/jira/browse/HADOOP-18073
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: auth, fs/s3
> Affects Versions: 3.3.1
> Reporter: xiaowei sun
> Assignee: Ahmar Suhail
> Priority: Major
> Labels: pull-request-available
> Attachments: Upgrading S3A to SDKV2.pdf
>
>
> This task tracks upgrading Hadoop's AWS connector S3A from AWS SDK for Java
> V1 to AWS SDK for Java V2.
> Original use case:
> {quote}We would like to access s3 with AWS SSO, which is supported inĀ
> software.amazon.awssdk:sdk-core:2.*.
> In particular, from
> [https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html],
> when to set 'fs.s3a.aws.credentials.provider', it must be
> "com.amazonaws.auth.AWSCredentialsProvider". We would like to support
> "software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider" which
> supports AWS SSO, so users only need to authenticate once.
> {quote}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]