This is an automated email from the ASF dual-hosted git repository. stevel pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 203b94414fd HADOOP-19317. S3A: fs.s3a.connection.expect.continue controls 100 CONTINUE behavior (#7134) (#7178) 203b94414fd is described below commit 203b94414fdc67d1b77006dc05402d373c4f74d1 Author: Steve Loughran <ste...@cloudera.com> AuthorDate: Mon Nov 25 19:02:59 2024 +0000 HADOOP-19317. S3A: fs.s3a.connection.expect.continue controls 100 CONTINUE behavior (#7134) (#7178) New option fs.s3a.connection.expect.continue This controls whether or not an PUT request to the S3 store sets the "Expect: 100-continue" header and awaits a 100 CONTINUE response before uploading any data. This allows for throttling and other problems to be detected fast. The default is "true" -the header is sent. (This is the SDK v1 backport). Contributed by Steve Loughran Change-Id: Ic448407268b80d2532c7b7080a21b0fe48694ef5 --- .../java/org/apache/hadoop/fs/s3a/Constants.java | 14 +++++++ .../java/org/apache/hadoop/fs/s3a/S3AUtils.java | 3 ++ .../site/markdown/tools/hadoop-aws/connecting.md | 13 ++++++ .../fs/contract/s3a/ITestS3AContractCreate.java | 46 ++++++++++++++++++++++ .../hadoop/fs/s3a/ITestS3AConfiguration.java | 19 +++++++++ .../fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java | 6 +++ 6 files changed, 101 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 796b0752feb..9e7cef02581 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -282,6 +282,20 @@ public final class Constants { "fs.s3a.connection.request.timeout"; public static final int DEFAULT_REQUEST_TIMEOUT = 0; + /** + * Should PUT requests await a 100 CONTINUE responses before uploading + * data? + * <p> + * Value: {@value}. + */ + public static final String CONNECTION_EXPECT_CONTINUE = + "fs.s3a.connection.expect.continue"; + + /** + * Default value for {@link #CONNECTION_EXPECT_CONTINUE}. + */ + public static final boolean CONNECTION_EXPECT_CONTINUE_DEFAULT = true; + // socket send buffer to be used in Amazon client public static final String SOCKET_SEND_BUFFER = "fs.s3a.socket.send.buffer"; public static final int DEFAULT_SOCKET_SEND_BUFFER = 8 * 1024; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index dc0c211fcab..590b0b55ac4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -1317,6 +1317,9 @@ public final class S3AUtils { LOG.debug("Signer override = {}", signerOverride); awsConf.setSignerOverride(signerOverride); } + boolean expectContinueEnabled = conf.getBoolean(CONNECTION_EXPECT_CONTINUE, + CONNECTION_EXPECT_CONTINUE_DEFAULT); + awsConf.setUseExpectContinue(expectContinueEnabled); } /** diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md index f1839a0b203..e41a85aa715 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -117,6 +117,19 @@ See [Timeouts](performance.html#timeouts). </description> </property> +<property> + <name>fs.s3a.connection.expect.continue</name> + <value>true</value> + <description> + Should PUT requests await a 100 CONTINUE responses before uploading + data? + This should normally be left alone unless a third party store which + does not support it is encountered, or file upload over long + distance networks time out. + (see HADOOP-19317 as an example) + </description> +</property> + <property> <name>fs.s3a.connection.ssl.enabled</name> <value>true</value> diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java index d2a858f615e..236ebd05285 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java @@ -18,18 +18,64 @@ package org.apache.hadoop.fs.contract.s3a; +import java.util.Arrays; +import java.util.Collection; + +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.contract.AbstractContractCreateTest; import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.s3a.S3ATestUtils; + +import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; /** * S3A contract tests creating files. */ +@RunWith(Parameterized.class) public class ITestS3AContractCreate extends AbstractContractCreateTest { + /** + * This test suite is parameterized for the different create file + * options. + * @return a list of test parameters. + */ + @Parameterized.Parameters + public static Collection<Object[]> params() { + return Arrays.asList(new Object[][]{ + {false}, + {true} + }); + } + + /** + * Expect a 100-continue response? + */ + private final boolean expectContinue; + + public ITestS3AContractCreate(final boolean expectContinue) { + this.expectContinue = expectContinue; + } + @Override protected AbstractFSContract createContract(Configuration conf) { return new S3AContract(conf); } + @Override + protected Configuration createConfiguration() { + final Configuration conf = + super.createConfiguration(); + + removeBaseAndBucketOverrides( + conf, + CONNECTION_EXPECT_CONTINUE); + conf.setBoolean(CONNECTION_EXPECT_CONTINUE, expectContinue); + S3ATestUtils.disableFilesystemCaching(conf); + return conf; + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index ff75f6e2613..cf7d40ecfb8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -574,4 +574,23 @@ public class ITestS3AConfiguration { .assertEquals(signerOverride, clientConfiguration.getSignerOverride()); } + @Test(timeout = 10_000L) + public void testExpectContinueFalse() throws Exception { + Configuration config = new Configuration(false); + config.setBoolean(CONNECTION_EXPECT_CONTINUE, false); + ClientConfiguration awsConf = new ClientConfiguration(); + initConnectionSettings(config, awsConf); + Assertions.assertThat(awsConf.isUseExpectContinue()) + .describedAs("expect continue flag") + .isFalse(); + } + + @Test(timeout = 10_000L) + public void testExpectContinueDefault() throws Exception { + ClientConfiguration awsConf = new ClientConfiguration(); + initConnectionSettings(new Configuration(false), awsConf); + Assertions.assertThat(awsConf.isUseExpectContinue()) + .describedAs("expect continue flag") + .isTrue(); + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java index ed300dba01e..99b87dd7cb8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.api.UnsupportedRequestException; import static org.apache.hadoop.fs.contract.ContractTestUtils.IO_CHUNK_BUFFER_SIZE; +import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE; import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD; import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_MIN_SIZE; import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE; @@ -65,17 +66,22 @@ public class ITestS3AHugeFilesNoMultipart extends AbstractSTestS3AHugeFiles { * Create a configuration without multipart upload, * and a long request timeout to allow for a very slow * PUT in close. + * <p> + * 100-continue is disabled so as to verify the behavior + * on a large PUT. * @return the configuration to create the test FS with. */ @Override protected Configuration createScaleConfiguration() { Configuration conf = super.createScaleConfiguration(); removeBaseAndBucketOverrides(conf, + CONNECTION_EXPECT_CONTINUE, IO_CHUNK_BUFFER_SIZE, MIN_MULTIPART_THRESHOLD, MULTIPART_UPLOADS_ENABLED, MULTIPART_SIZE, REQUEST_TIMEOUT); + conf.setBoolean(CONNECTION_EXPECT_CONTINUE, false); conf.setInt(IO_CHUNK_BUFFER_SIZE, 655360); conf.set(MIN_MULTIPART_THRESHOLD, S_1T); conf.set(MULTIPART_SIZE, S_1T); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org