[ 
https://issues.apache.org/jira/browse/HADOOP-19740?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18038996#comment-18038996
 ] 

ASF GitHub Bot commented on HADOOP-19740:
-----------------------------------------

mukund-thakur commented on code in PR #8058:
URL: https://github.com/apache/hadoop/pull/8058#discussion_r2535742542


##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RegionResolution.java:
##########
@@ -0,0 +1,535 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.impl;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Locale;
+import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.annotation.Nullable;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.awscore.util.AwsHostNameUtils;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.regions.providers.InstanceProfileRegionProvider;
+
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.Invoker;
+import org.apache.hadoop.fs.s3a.Retries;
+import org.apache.hadoop.fs.s3a.S3ClientFactory;
+
+import static java.util.Objects.requireNonNull;
+import static 
org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED;
+import static 
org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT;
+import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.EC2_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.EMPTY_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
+import static org.apache.hadoop.fs.s3a.Constants.SDK_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS;
+import static org.apache.hadoop.util.Preconditions.checkArgument;
+import static software.amazon.awssdk.regions.Region.US_EAST_2;
+
+/**
+ * Region resolution.
+ * <p>This is complicated and can be a source of support escalations.
+ * <p>The V1 SDK was happy to take an endpoint and
+ * work details out from there, possibly probing us-central-1 and cacheing
+ * the result.
+ * <p>The V2 SDK like the signing region and endpoint to be declared.
+ * The S3A connector has tried to mimic the V1 code, but lacks some features
+ * (use of environment variables, probing of EC2 IAM details) for which
+ * the SDK is better.
+ *
+ */
+public class RegionResolution {
+
+  protected static final Logger LOG =
+      LoggerFactory.getLogger(RegionResolution.class);
+
+  /**
+   * Service to ask SDK to parse.
+   */
+  private static final String S3_SERVICE_NAME = "s3";
+
+  /**
+   * Pattern to match vpce endpoints on.
+   */
+  private static final Pattern VPC_ENDPOINT_PATTERN =
+      
Pattern.compile("^(?:.+\\.)?([a-z0-9-]+)\\.vpce\\.amazonaws\\.(?:com|com\\.cn)$");
+
+  /**
+   * Error message when an endpoint is set with FIPS enabled: {@value}.
+   */
+  @VisibleForTesting
+  public static final String ERROR_ENDPOINT_WITH_FIPS =
+      "Only S3 central endpoint cannot be set when " + FIPS_ENDPOINT + " is 
true";
+
+  /**
+   * Virtual hostnames MUST be used when using the FIPS endpoint.
+   */
+  public static final String FIPS_PATH_ACCESS_INCOMPATIBLE =
+      "Path style access must be disabled when " + FIPS_ENDPOINT + " is true";
+
+  /**
+   * String value for external region: {@value}.
+   */
+  public static final String EXTERNAL = "external";
+
+  /**
+   * External region, used for third party endpoints.
+   */
+  public static final Region EXTERNAL_REGION = Region.of(EXTERNAL);
+
+  /**
+   * How was the region resolved?
+   */
+  public enum RegionResolutionMechanism {
+
+    CalculatedFromEndpoint("Calculated from endpoint"),
+    ExternalEndpoint("External endpoint"),
+    FallbackToCentral("Fallback to central endpoint"),
+    ParseVpceEndpoint("Parse VPCE Endpoint"),
+    Ec2Metadata("EC2 Metadata"),
+    Sdk("SDK resolution chain"),
+    Specified("region specified");
+
+    /**
+     * Text of the mechanism.
+     */
+    private final String mechanism;
+
+    RegionResolutionMechanism(String mechanism) {
+      this.mechanism = mechanism;
+    }
+
+    /**
+     * String value of the resolution mechanism.
+     * @return the resolution mechanism.
+     */
+    public String getMechanism() {
+      return mechanism;
+    }
+
+    @Override
+    public String toString() {
+      final StringBuilder sb = new StringBuilder("RegionResolutionMechanism{");
+      sb.append("mechanism='").append(mechanism).append('\'');
+      sb.append('}');
+      return sb.toString();
+    }
+  }
+
+  /**
+   * The resolution of a region and endpoint..
+   */
+  public static final class Resolution {
+
+    /**
+     * Region: if null hand down to the SDK.
+     */
+    private Region region;
+
+    /**
+     * How was the region resolved?
+     * Null means unresolved.
+     */
+    private RegionResolutionMechanism mechanism;
+
+    /**
+     * Should FIPS be enabled?
+     */
+    private boolean useFips;
+
+    /**
+     * Should cross-region access be enabled?
+     */
+    private boolean crossRegionAccessEnabled;
+
+    /**
+     * Endpoint as string.
+     */
+    private String endpointStr;
+
+    /**
+     * Endpoint URI.
+     */
+    private URI endpointUri;
+
+    /**
+     * Use the central endpoint?
+     */
+    private boolean useCentralEndpoint;
+
+    public Resolution() {
+    }
+
+    /**
+     * Instantiate with a region and resolution mechanism.
+     * @param region region
+     * @param mechanism resolution mechanism.
+     */
+    public Resolution(final Region region, final RegionResolutionMechanism 
mechanism) {
+      this.region = region;
+      this.mechanism = mechanism;
+    }
+
+    /**
+     * Set the region.
+     * Declares the region as resolved even when the value is null (i.e. 
resolve to SDK).
+     * @param region region
+     * @param resolutionMechanism resolution mechanism.
+     * @return the builder
+     */
+    public Resolution withRegion(
+        @Nullable final Region region,
+        final RegionResolutionMechanism resolutionMechanism) {
+      this.region = region;
+      this.mechanism = requireNonNull(resolutionMechanism);
+      return this;
+    }
+
+    /**
+     * Set builder value.
+     * @param value new value
+     * @return the builder
+     */
+    public Resolution withUseFips(final boolean value) {
+      useFips = value;
+      return this;
+    }
+
+    /**
+     * Set builder value.
+     * @param value new value
+     * @return the builder
+     */
+    public Resolution withCrossRegionAccessEnabled(final boolean value) {
+      crossRegionAccessEnabled = value;
+      return this;
+    }
+
+    /**
+     * Set builder value.
+     * @param value new value
+     * @return the builder
+     */
+    public Resolution withEndpointStr(final String value) {
+      endpointStr = value;
+      return this;
+    }
+
+    public URI getEndpointUri() {
+      return endpointUri;
+    }
+
+    /**
+     * Set builder value.
+     * @param value new value
+     * @return the builder
+     */
+    public Resolution withEndpointUri(final URI value) {
+      endpointUri = value;
+      return this;
+    }
+
+    public Region getRegion() {
+      return region;
+    }
+
+    public boolean isUseFips() {
+      return useFips;
+    }
+
+    public boolean isCrossRegionAccessEnabled() {
+      return crossRegionAccessEnabled;
+    }
+
+    public RegionResolutionMechanism getMechanism() {
+      return mechanism;
+    }
+
+    public String getEndpointStr() {
+      return endpointStr;
+    }
+
+    public boolean isRegionResolved() {
+      return mechanism != null;
+    }
+
+    public boolean isUseCentralEndpoint() {
+      return useCentralEndpoint;
+    }
+
+    /**
+     * Set builder value.
+     * @param value new value
+     * @return the builder
+     */
+    public Resolution withUseCentralEndpoint(final boolean value) {
+      useCentralEndpoint = value;
+      return this;
+    }
+
+    @Override
+    public String toString() {
+      final StringBuilder sb = new StringBuilder("Resolution{");
+      sb.append("region=").append(region);
+      sb.append(", resolution=").append(mechanism);
+      sb.append(", useFips=").append(useFips);
+      sb.append(", 
crossRegionAccessEnabled=").append(crossRegionAccessEnabled);
+      sb.append(", endpointUri=").append(endpointUri);
+      sb.append(", useCentralEndpoint=").append(useCentralEndpoint);
+      sb.append('}');
+      return sb.toString();
+    }
+  }
+
+  /**
+   * Given a endpoint string, create the endpoint URI.
+   * @param endpoint possibly null endpoint.
+   * @param secureConnections use secure HTTPS connection?
+   * @return an endpoint uri or null if the endpoint was passed in was 
null/empty
+   * @throws IllegalArgumentException failure to parse the endpoint.
+   */
+  public static URI buildEndpointUri(String endpoint, final boolean 
secureConnections) {
+
+    String protocol = secureConnections ? "https" : "http";
+
+    if (endpoint == null || endpoint.isEmpty()) {
+      // don't set an endpoint if none is configured, instead let the SDK 
figure it out.
+      return null;
+    }
+
+    if (!endpoint.contains("://")) {
+      endpoint = String.format("%s://%s", protocol, endpoint);
+    }
+
+    try {
+      return new URI(endpoint);
+    } catch (URISyntaxException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  /**
+   * Parses the endpoint to get the region.
+   * If endpoint is the central one, use US_EAST_2.
+   * @param endpoint the configure endpoint.
+   * @param endpointEndsWithCentral true if the endpoint is configured as 
central.
+   * @return the S3 region resolution if possible from parsing the endpoint
+   */
+  @VisibleForTesting
+  public static Optional<Resolution> getS3RegionFromEndpoint(
+      final String endpoint,
+      final boolean endpointEndsWithCentral) {
+
+    if (!endpointEndsWithCentral) {
+      // S3 VPC endpoint parsing
+      Matcher matcher = VPC_ENDPOINT_PATTERN.matcher(endpoint);
+      if (matcher.find()) {
+        LOG.debug("Mapping to VPCE");
+        LOG.debug("Endpoint {} is VPC endpoint; parsing region as {}",
+            endpoint, matcher.group(1));
+        return Optional.of(new Resolution(
+            Region.of(matcher.group(1)),
+            RegionResolutionMechanism.ParseVpceEndpoint));
+      }
+
+      LOG.debug("Endpoint {} is not the default; parsing", endpoint);
+      return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME)
+          .map(r ->
+              new Resolution(r, 
RegionResolutionMechanism.CalculatedFromEndpoint));
+    }
+
+    // No resolution.
+    return Optional.empty();
+  }
+
+  /**
+   * Is this an AWS endpoint, that is: has an endpoint been set which matches
+   * amazon.
+   * @param endpoint non-null endpoint URL
+   * @return true if this is amazonaws or amazonaws china
+   */
+  public static boolean isAwsEndpoint(final String endpoint) {
+    final String h = endpoint.toLowerCase(Locale.ROOT);
+    // Common AWS partitions: global (.amazonaws.com) and China 
(.amazonaws.com.cn).
+    return h.endsWith(".amazonaws.com")
+        || h.endsWith(".amazonaws.com.cn");
+  }
+
+
+  /**
+   * Does the region name refer to an SDK region?
+   * @param configuredRegion region in the configuration
+   * @return true if this is considered to refer to an SDK region.
+   */
+  public static boolean isSdkRegion(String configuredRegion) {
+    return SDK_REGION.equalsIgnoreCase(configuredRegion)
+        || EMPTY_REGION.equalsIgnoreCase(configuredRegion);
+  }
+
+  /**
+   * Does the region name refer to {@code "ec2"} in which case special handling
+   * is required.
+   * @param configuredRegion region in the configuration
+   * @return true if this is considered to refer to an SDK region.
+   */
+  public static boolean isEc2Region(String configuredRegion) {
+    return EC2_REGION.equalsIgnoreCase(configuredRegion);
+  }
+
+  /**
+   * Calculate the region and the final endpoint.
+   * @param parameters creation parameters
+   * @param conf configuration with other options.
+   * @return the resolved region and endpoint.
+   * @throws IOException if the client failed to communicate with the IAM 
service.
+   * @throws IllegalArgumentException failure to parse endpoint, or FIPS 
settings.
+   */
+  @Retries.OnceTranslated
+  public static Resolution calculateRegion(
+      final S3ClientFactory.S3ClientCreationParameters parameters,
+      final Configuration conf) throws IOException {
+
+    Resolution resolution = new Resolution();
+
+    // endpoint; may be null
+    final String endpointStr = parameters.getEndpoint();
+    boolean endpointDeclared = endpointStr != null && !endpointStr.isEmpty();
+    // will be null if endpointStr is null/empty
+    final URI endpoint = buildEndpointUri(endpointStr,
+        conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS));
+
+    final String configuredRegion = parameters.getRegion();
+
+    // If the region was configured, set it.
+    // this includes special handling of the sdk, ec2 and "" regions.
+    if (configuredRegion != null) {

Review Comment:
   Where is the not configured else part?





> S3A: add explicit "sdk" and "ec2" regions for region resolution through SDK 
> and EC2 IAM
> ---------------------------------------------------------------------------------------
>
>                 Key: HADOOP-19740
>                 URL: https://issues.apache.org/jira/browse/HADOOP-19740
>             Project: Hadoop Common
>          Issue Type: Improvement
>          Components: fs/s3
>    Affects Versions: 3.4.2
>            Reporter: Steve Loughran
>            Assignee: Steve Loughran
>            Priority: Major
>              Labels: pull-request-available
>
> Add explicit regions to hand off to the sdk
> * sdk: "use the sdk chain"
> * ec2: "we are in EC2, use the local region": use the iAM logic inside the 
> SDK directly.
> empty string "" also hands off to the SDK; the warning will be removed
> also: if an endpoint is set and it is not parsed as a vpce endpoint, we will 
> automatically add the endpoint name "external". This avoids the need to make 
> up an external region when working with an endpoint.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to