This is an automated email from the ASF dual-hosted git repository.

stevel pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 203b94414fd HADOOP-19317. S3A: fs.s3a.connection.expect.continue 
controls 100 CONTINUE behavior (#7134) (#7178)
203b94414fd is described below

commit 203b94414fdc67d1b77006dc05402d373c4f74d1
Author: Steve Loughran <ste...@cloudera.com>
AuthorDate: Mon Nov 25 19:02:59 2024 +0000

    HADOOP-19317. S3A: fs.s3a.connection.expect.continue controls 100 CONTINUE 
behavior (#7134) (#7178)
    
    
    New option
    
      fs.s3a.connection.expect.continue
    
    This controls whether or not an PUT request to the S3 store
    sets the "Expect: 100-continue" header and awaits a 100 CONTINUE
    response before uploading any data.
    
    This allows for throttling and other problems to be detected fast.
    
    The default is "true" -the header is sent.
    
    (This is the SDK v1 backport).
    
    Contributed by Steve Loughran
    
    Change-Id: Ic448407268b80d2532c7b7080a21b0fe48694ef5
---
 .../java/org/apache/hadoop/fs/s3a/Constants.java   | 14 +++++++
 .../java/org/apache/hadoop/fs/s3a/S3AUtils.java    |  3 ++
 .../site/markdown/tools/hadoop-aws/connecting.md   | 13 ++++++
 .../fs/contract/s3a/ITestS3AContractCreate.java    | 46 ++++++++++++++++++++++
 .../hadoop/fs/s3a/ITestS3AConfiguration.java       | 19 +++++++++
 .../fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java |  6 +++
 6 files changed, 101 insertions(+)

diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 796b0752feb..9e7cef02581 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -282,6 +282,20 @@ public final class Constants {
       "fs.s3a.connection.request.timeout";
   public static final int DEFAULT_REQUEST_TIMEOUT = 0;
 
+  /**
+   * Should PUT requests await a 100 CONTINUE responses before uploading
+   * data?
+   * <p>
+   * Value: {@value}.
+   */
+  public static final String CONNECTION_EXPECT_CONTINUE =
+      "fs.s3a.connection.expect.continue";
+
+  /**
+   * Default value for {@link #CONNECTION_EXPECT_CONTINUE}.
+   */
+  public static final boolean CONNECTION_EXPECT_CONTINUE_DEFAULT = true;
+
   // socket send buffer to be used in Amazon client
   public static final String SOCKET_SEND_BUFFER = "fs.s3a.socket.send.buffer";
   public static final int DEFAULT_SOCKET_SEND_BUFFER = 8 * 1024;
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
index dc0c211fcab..590b0b55ac4 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
@@ -1317,6 +1317,9 @@ public final class S3AUtils {
      LOG.debug("Signer override = {}", signerOverride);
       awsConf.setSignerOverride(signerOverride);
     }
+    boolean expectContinueEnabled = conf.getBoolean(CONNECTION_EXPECT_CONTINUE,
+            CONNECTION_EXPECT_CONTINUE_DEFAULT);
+    awsConf.setUseExpectContinue(expectContinueEnabled);
   }
 
   /**
diff --git 
a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md 
b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
index f1839a0b203..e41a85aa715 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
@@ -117,6 +117,19 @@ See [Timeouts](performance.html#timeouts).
   </description>
 </property>
 
+<property>
+  <name>fs.s3a.connection.expect.continue</name>
+  <value>true</value>
+  <description>
+    Should PUT requests await a 100 CONTINUE responses before uploading
+    data?
+    This should normally be left alone unless a third party store which
+    does not support it is encountered, or file upload over long
+    distance networks time out.
+    (see HADOOP-19317 as an example)
+  </description>
+</property>
+
 <property>
   <name>fs.s3a.connection.ssl.enabled</name>
   <value>true</value>
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
index d2a858f615e..236ebd05285 100644
--- 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
@@ -18,18 +18,64 @@
 
 package org.apache.hadoop.fs.contract.s3a;
 
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
 import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.apache.hadoop.fs.s3a.S3ATestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
 
 /**
  * S3A contract tests creating files.
  */
+@RunWith(Parameterized.class)
 public class ITestS3AContractCreate extends AbstractContractCreateTest {
 
+  /**
+   * This test suite is parameterized for the different create file
+   * options.
+   * @return a list of test parameters.
+   */
+  @Parameterized.Parameters
+  public static Collection<Object[]> params() {
+    return Arrays.asList(new Object[][]{
+        {false},
+        {true}
+    });
+  }
+
+  /**
+   * Expect a 100-continue response?
+   */
+  private final boolean expectContinue;
+
+  public ITestS3AContractCreate(final boolean expectContinue) {
+    this.expectContinue = expectContinue;
+  }
+
   @Override
   protected AbstractFSContract createContract(Configuration conf) {
     return new S3AContract(conf);
   }
 
+  @Override
+  protected Configuration createConfiguration() {
+    final Configuration conf =
+        super.createConfiguration();
+
+    removeBaseAndBucketOverrides(
+        conf,
+        CONNECTION_EXPECT_CONTINUE);
+    conf.setBoolean(CONNECTION_EXPECT_CONTINUE, expectContinue);
+    S3ATestUtils.disableFilesystemCaching(conf);
+    return conf;
+  }
+
 }
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
index ff75f6e2613..cf7d40ecfb8 100644
--- 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
@@ -574,4 +574,23 @@ public class ITestS3AConfiguration {
         .assertEquals(signerOverride, clientConfiguration.getSignerOverride());
   }
 
+  @Test(timeout = 10_000L)
+  public void testExpectContinueFalse() throws Exception {
+    Configuration config = new Configuration(false);
+    config.setBoolean(CONNECTION_EXPECT_CONTINUE, false);
+    ClientConfiguration awsConf = new ClientConfiguration();
+    initConnectionSettings(config, awsConf);
+    Assertions.assertThat(awsConf.isUseExpectContinue())
+        .describedAs("expect continue flag")
+        .isFalse();
+  }
+
+  @Test(timeout = 10_000L)
+  public void testExpectContinueDefault() throws Exception {
+    ClientConfiguration awsConf = new ClientConfiguration();
+    initConnectionSettings(new Configuration(false), awsConf);
+    Assertions.assertThat(awsConf.isUseExpectContinue())
+        .describedAs("expect continue flag")
+        .isTrue();
+  }
 }
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java
index ed300dba01e..99b87dd7cb8 100644
--- 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.s3a.S3ATestUtils;
 import org.apache.hadoop.fs.s3a.api.UnsupportedRequestException;
 
 import static 
org.apache.hadoop.fs.contract.ContractTestUtils.IO_CHUNK_BUFFER_SIZE;
+import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE;
 import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD;
 import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_MIN_SIZE;
 import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE;
@@ -65,17 +66,22 @@ public class ITestS3AHugeFilesNoMultipart extends 
AbstractSTestS3AHugeFiles {
    * Create a configuration without multipart upload,
    * and a long request timeout to allow for a very slow
    * PUT in close.
+   * <p>
+   * 100-continue is disabled so as to verify the behavior
+   * on a large PUT.
    * @return the configuration to create the test FS with.
    */
   @Override
   protected Configuration createScaleConfiguration() {
     Configuration conf = super.createScaleConfiguration();
     removeBaseAndBucketOverrides(conf,
+        CONNECTION_EXPECT_CONTINUE,
         IO_CHUNK_BUFFER_SIZE,
         MIN_MULTIPART_THRESHOLD,
         MULTIPART_UPLOADS_ENABLED,
         MULTIPART_SIZE,
         REQUEST_TIMEOUT);
+    conf.setBoolean(CONNECTION_EXPECT_CONTINUE, false);
     conf.setInt(IO_CHUNK_BUFFER_SIZE, 655360);
     conf.set(MIN_MULTIPART_THRESHOLD, S_1T);
     conf.set(MULTIPART_SIZE, S_1T);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to