>From Hussain Towaileb <[email protected]>:

Hussain Towaileb has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17728 )


Change subject: Testing
......................................................................

Testing

Change-Id: Ie38d266cb9877c2de211ec253ef31c435cd29cc5
---
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
1 file changed, 44 insertions(+), 0 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/28/17728/1

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 7ae992a..c4e3a17 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -18,6 +18,7 @@
  */
 package org.apache.asterix.external.input.record.reader.aws;

+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
@@ -36,6 +37,8 @@
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;

 import software.amazon.awssdk.services.s3.model.S3Object;

@@ -43,6 +46,8 @@

     private static final long serialVersionUID = 1L;

+    public static final Logger LOGGER = LogManager.getLogger();
+
     @Override
     public AsterixInputStream createInputStream(IHyracksTaskContext ctx, int 
partition) throws HyracksDataException {
         return new AwsS3InputStream(configuration, 
partitionWorkLoadsBasedOnSize.get(partition).getFilePaths());
@@ -64,6 +69,36 @@
         // TODO(htowaileb): Since we're using the root to load the files then 
start filtering, it might end up being
         // very expensive since at the root of the prefix we might load 
millions of files, we should consider (when
         // possible) to get the value and add it
+
+        /**
+         * The following is an experiment to see the performance of listing 
the object keys from S3, each request lists
+         * 1000 keys at a time. The test will try:
+         * - 10 lists (10k objects)
+         * - 100 lists (100k objects)
+         * - 1000 lists (1m objects)
+         * - 10000 lists (10m objects)
+         * - 100000 lists (100m objects)
+         * - 200000 lists (200m objects)
+         */
+        int[] requests = new int[] { 10, 100, 1000, 10000, 100000, 200000 };
+        for (int tries : requests) {
+            long start = System.nanoTime();
+            LOGGER.info("-------------- {} requests started ------------", 
tries);
+            for (int j = 0; j < tries; j++) {
+                long thisRun = System.nanoTime();
+                List<S3Object> filesOnly =
+                        S3Utils.listS3Objects(configuration, 
includeExcludeMatcher, warningCollector);
+                if (j % 100 == 0) {
+                    LOGGER.info("Request {} done: {} seconds", j,
+                            Duration.ofNanos(System.nanoTime() - 
thisRun).toMillis() / 1000d);
+                }
+            }
+            LOGGER.info("-------------- {} requests ended --------------", 
tries);
+            long end = System.nanoTime();
+            LOGGER.info("Total time for {} requests: {} seconds", tries,
+                    Duration.ofNanos(end - start).toMillis() / 1000d);
+        }
+
         List<S3Object> filesOnly = S3Utils.listS3Objects(configuration, 
includeExcludeMatcher, warningCollector);
         filesOnly = filterPrefixes(externalDataPrefix, filesOnly, 
filterEvaluatorFactory.create(ctx, warningCollector));


--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17728
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Ie38d266cb9877c2de211ec253ef31c435cd29cc5
Gerrit-Change-Number: 17728
Gerrit-PatchSet: 1
Gerrit-Owner: Hussain Towaileb <[email protected]>
Gerrit-MessageType: newchange

Reply via email to