Repository: hadoop Updated Branches: refs/heads/branch-2.8 6b1f86f7f -> 4d9001456
HADOOP-12963 Allow using path style addressing for accessing the s3 endpoint. (Stephen Montgomery via stevel) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4d900145 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4d900145 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4d900145 Branch: refs/heads/branch-2.8 Commit: 4d90014560c60e70fb8037d46656e969e8612e19 Parents: 6b1f86f Author: Steve Loughran <ste...@apache.org> Authored: Thu Apr 14 12:44:55 2016 +0100 Committer: Steve Loughran <ste...@apache.org> Committed: Thu Apr 14 12:45:26 2016 +0100 ---------------------------------------------------------------------- .../src/main/resources/core-default.xml | 7 +++ .../org/apache/hadoop/fs/s3a/Constants.java | 10 +++-- .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 10 +++++ .../src/site/markdown/tools/hadoop-aws/index.md | 7 +++ .../hadoop/fs/s3a/TestS3AConfiguration.java | 47 ++++++++++++++++++-- 5 files changed, 75 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/4d900145/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 75e51c6..83dd37d 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -741,6 +741,13 @@ </property> <property> + <name>fs.s3a.path.style.access</name> + <description>Enable S3 path style access ie disabling the default virtual hosting behaviour. + Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. + </description> +</property> + +<property> <name>fs.s3a.proxy.host</name> <description>Hostname of the (optional) proxy server for S3 connections.</description> </property> http://git-wip-us.apache.org/repos/asf/hadoop/blob/4d900145/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index faa760c..a170747 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -28,13 +28,17 @@ public class Constants { // number of simultaneous connections to s3 public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum"; public static final int DEFAULT_MAXIMUM_CONNECTIONS = 15; - + // connect to s3 over ssl? public static final String SECURE_CONNECTIONS = "fs.s3a.connection.ssl.enabled"; public static final boolean DEFAULT_SECURE_CONNECTIONS = true; //use a custom endpoint? public static final String ENDPOINT = "fs.s3a.endpoint"; + + //Enable path style access? Overrides default virtual hosting + public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access"; + //connect to s3 through a proxy server? public static final String PROXY_HOST = "fs.s3a.proxy.host"; public static final String PROXY_PORT = "fs.s3a.proxy.port"; @@ -50,7 +54,7 @@ public class Constants { // seconds until we give up trying to establish a connection to s3 public static final String ESTABLISH_TIMEOUT = "fs.s3a.connection.establish.timeout"; public static final int DEFAULT_ESTABLISH_TIMEOUT = 50000; - + // seconds until we give up on a connection to s3 public static final String SOCKET_TIMEOUT = "fs.s3a.connection.timeout"; public static final int DEFAULT_SOCKET_TIMEOUT = 200000; @@ -79,7 +83,7 @@ public class Constants { // size of each of or multipart pieces in bytes public static final String MULTIPART_SIZE = "fs.s3a.multipart.size"; public static final long DEFAULT_MULTIPART_SIZE = 104857600; // 100 MB - + // minimum size in bytes before we start a multipart uploads or copy public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold"; public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE; http://git-wip-us.apache.org/repos/asf/hadoop/blob/4d900145/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 23d17fb..83d1d53 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -41,6 +41,7 @@ import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.S3ClientOptions; import com.amazonaws.services.s3.model.CannedAccessControlList; import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest; @@ -302,6 +303,15 @@ public class S3AFileSystem extends FileSystem { throw new IllegalArgumentException(msg, e); } } + enablePathStyleAccessIfRequired(conf); + } + + private void enablePathStyleAccessIfRequired(Configuration conf) { + final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false); + if (pathStyleAccess) { + LOG.debug("Enabling path style access!"); + s3.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true)); + } } private void initTransferManager() { http://git-wip-us.apache.org/repos/asf/hadoop/blob/4d900145/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 6c3f31e..15b9837 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -222,6 +222,13 @@ this capability. </property> <property> + <name>fs.s3a.path.style.access</name> + <description>Enable S3 path style access ie disabling the default virtual hosting behaviour. + Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. + </description> + </property> + + <property> <name>fs.s3a.proxy.host</name> <description>Hostname of the (optional) proxy server for S3 connections.</description> </property> http://git-wip-us.apache.org/repos/asf/hadoop/blob/4d900145/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java index e74ebca..264ee1f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java @@ -19,10 +19,14 @@ package org.apache.hadoop.fs.s3a; import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.S3ClientOptions; +import com.amazonaws.services.s3.model.AmazonS3Exception; + import org.apache.commons.lang.StringUtils; import com.amazonaws.AmazonClientException; import org.apache.hadoop.conf.Configuration; - +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; @@ -30,17 +34,19 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.io.File; import java.net.URI; -import java.io.IOException; +import java.lang.reflect.Field; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.alias.CredentialProvider; import org.apache.hadoop.security.alias.CredentialProviderFactory; - +import org.apache.http.HttpStatus; import org.junit.rules.TemporaryFolder; public class TestS3AConfiguration { @@ -352,4 +358,39 @@ public class TestS3AConfiguration { assertEquals("SecretKey incorrect.", "456", creds.getAccessSecret()); } + + @Test + public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() throws Exception { + + conf = new Configuration(); + conf.set(Constants.PATH_STYLE_ACCESS, Boolean.toString(true)); + assertTrue(conf.getBoolean(Constants.PATH_STYLE_ACCESS, false)); + + try { + fs = S3ATestUtils.createTestFileSystem(conf); + final Object object = getClientOptionsField(fs.getAmazonS3Client(), "clientOptions"); + assertNotNull(object); + assertTrue("Unexpected type found for clientOptions!", object instanceof S3ClientOptions); + assertTrue("Expected to find path style access to be switched on!", ((S3ClientOptions) object).isPathStyleAccess()); + byte[] file = ContractTestUtils.toAsciiByteArray("test file"); + ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); + } catch (final AmazonS3Exception e) { + LOG.error("Caught exception: ", e); + // Catch/pass standard path style access behaviour when live bucket + // isn't in the same region as the s3 client default. See + // http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html + assertEquals(e.getStatusCode(), HttpStatus.SC_MOVED_PERMANENTLY); + } + } + + private Object getClientOptionsField(AmazonS3Client s3client, String field) + throws NoSuchFieldException, IllegalAccessException { + final Field clientOptionsProps = s3client.getClass().getDeclaredField(field); + assertNotNull(clientOptionsProps); + if (!clientOptionsProps.isAccessible()) { + clientOptionsProps.setAccessible(true); + } + final Object object = clientOptionsProps.get(s3client); + return object; + } }