This is an automated email from the ASF dual-hosted git repository.

wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 81c3249  [ASTERIXDB-2944][EXT] Ensure the size of S3 connection pool
81c3249 is described below

commit 81c3249322957be261cd99bf7d6b464fcb4a3bbd
Author: Wail Alkowaileet <[email protected]>
AuthorDate: Wed Aug 11 08:44:01 2021 -0700

    [ASTERIXDB-2944][EXT] Ensure the size of S3 connection pool
    
    - user mode changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - We set the S3 connection pool size to be the number of partitions.
    
    Change-Id: I1e1ce66cc7cd39cc81d004f90c36871ad31a685f
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12763
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Wael Alkowaileet <[email protected]>
    Reviewed-by: Hussain Towaileb <[email protected]>
---
 .../reader/aws/parquet/AwsS3ParquetReaderFactory.java       |  3 ++-
 .../apache/asterix/external/util/ExternalDataConstants.java | 10 +++++++++-
 .../org/apache/asterix/external/util/ExternalDataUtils.java | 13 ++++++++++---
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
index c9eb489..803e657 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
@@ -51,7 +51,8 @@ public class AwsS3ParquetReaderFactory extends 
HDFSDataSourceFactory {
 
         //Configure Hadoop S3 input splits
         JobConf conf = createHdfsConf(serviceCtx, configuration);
-        ExternalDataUtils.AwsS3.configureAwsS3HdfsJobConf(conf, configuration);
+        int numberOfPartitions = 
getPartitionConstraint().getLocations().length;
+        ExternalDataUtils.AwsS3.configureAwsS3HdfsJobConf(conf, configuration, 
numberOfPartitions);
         configureHdfsConf(conf, configuration);
     }
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 51865f0..7445fbd 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -320,9 +320,17 @@ public class ExternalDataConstants {
         public static final String HADOOP_ACCESS_KEY_ID = "fs.s3a.access.key";
         public static final String HADOOP_SECRET_ACCESS_KEY = 
"fs.s3a.secret.key";
         public static final String HADOOP_SESSION_TOKEN = 
"fs.s3a.session.token";
-        public static final String HADOOP_PATH_STYLE_ACCESS = 
"fs.s3a.path.style.access";
         public static final String HADOOP_REGION = "fs.s3a.region";
         public static final String HADOOP_SERVICE_END_POINT = 
"fs.s3a.endpoint";
+
+        /*
+         * Internal configurations
+         */
+        //Allows accessing directories as file system path
+        public static final String HADOOP_PATH_STYLE_ACCESS = 
"fs.s3a.path.style.access";
+        //The number of maximum HTTP connections in connection pool
+        public static final String HADOOP_S3_CONNECTION_POOL_SIZE = 
"fs.s3a.connection.maximum";
+        //S3 used protocol
         public static final String HADOOP_S3_PROTOCOL = "s3a";
 
         //Hadoop credentials provider key
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 750ff13..59f2382 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -25,6 +25,7 @@ import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOO
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_ANONYMOUS_ACCESS;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_CREDENTIAL_PROVIDER_KEY;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_PATH_STYLE_ACCESS;
+import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_S3_CONNECTION_POOL_SIZE;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_SECRET_ACCESS_KEY;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_SESSION_TOKEN;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_TEMP_ACCESS;
@@ -121,7 +122,6 @@ import software.amazon.awssdk.services.s3.model.S3Object;
 import software.amazon.awssdk.services.s3.model.S3Response;
 
 public class ExternalDataUtils {
-
     private static final Map<ATypeTag, IValueParserFactory> 
valueParserFactoryMap = new EnumMap<>(ATypeTag.class);
     private static final int DEFAULT_MAX_ARGUMENT_SZ = 1024 * 1024;
     private static final int HEADER_FUDGE = 64;
@@ -833,9 +833,11 @@ public class ExternalDataUtils {
         /**
          * Builds the S3 client using the provided configuration
          *
-         * @param configuration properties
+         * @param configuration      properties
+         * @param numberOfPartitions number of partitions in the cluster
          */
-        public static void configureAwsS3HdfsJobConf(JobConf conf, Map<String, 
String> configuration) {
+        public static void configureAwsS3HdfsJobConf(JobConf conf, Map<String, 
String> configuration,
+                int numberOfPartitions) {
             String accessKeyId = 
configuration.get(ExternalDataConstants.AwsS3.ACCESS_KEY_ID_FIELD_NAME);
             String secretAccessKey = 
configuration.get(ExternalDataConstants.AwsS3.SECRET_ACCESS_KEY_FIELD_NAME);
             String sessionToken = 
configuration.get(ExternalDataConstants.AwsS3.SESSION_TOKEN_FIELD_NAME);
@@ -867,6 +869,11 @@ public class ExternalDataUtils {
              */
             conf.set(HADOOP_PATH_STYLE_ACCESS, ExternalDataConstants.TRUE);
 
+            /*
+             * Set the size of S3 connection pool to be the number of 
partitions
+             */
+            conf.set(HADOOP_S3_CONNECTION_POOL_SIZE, 
String.valueOf(numberOfPartitions));
+
             if (serviceEndpoint != null) {
                 // Validation of the URL should be done at hadoop-aws level
                 conf.set(ExternalDataConstants.AwsS3.HADOOP_SERVICE_END_POINT, 
serviceEndpoint);

Reply via email to