>From Hussain Towaileb <[email protected]>:
Hussain Towaileb has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17728 )
Change subject: Testing
......................................................................
Testing
Change-Id: Ie38d266cb9877c2de211ec253ef31c435cd29cc5
---
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
1 file changed, 44 insertions(+), 0 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/28/17728/1
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 7ae992a..c4e3a17 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -18,6 +18,7 @@
*/
package org.apache.asterix.external.input.record.reader.aws;
+import java.time.Duration;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
@@ -36,6 +37,8 @@
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import software.amazon.awssdk.services.s3.model.S3Object;
@@ -43,6 +46,8 @@
private static final long serialVersionUID = 1L;
+ public static final Logger LOGGER = LogManager.getLogger();
+
@Override
public AsterixInputStream createInputStream(IHyracksTaskContext ctx, int
partition) throws HyracksDataException {
return new AwsS3InputStream(configuration,
partitionWorkLoadsBasedOnSize.get(partition).getFilePaths());
@@ -64,6 +69,36 @@
// TODO(htowaileb): Since we're using the root to load the files then
start filtering, it might end up being
// very expensive since at the root of the prefix we might load
millions of files, we should consider (when
// possible) to get the value and add it
+
+ /**
+ * The following is an experiment to see the performance of listing
the object keys from S3, each request lists
+ * 1000 keys at a time. The test will try:
+ * - 10 lists (10k objects)
+ * - 100 lists (100k objects)
+ * - 1000 lists (1m objects)
+ * - 10000 lists (10m objects)
+ * - 100000 lists (100m objects)
+ * - 200000 lists (200m objects)
+ */
+ int[] requests = new int[] { 10, 100, 1000, 10000, 100000, 200000 };
+ for (int tries : requests) {
+ long start = System.nanoTime();
+ LOGGER.info("-------------- {} requests started ------------",
tries);
+ for (int j = 0; j < tries; j++) {
+ long thisRun = System.nanoTime();
+ List<S3Object> filesOnly =
+ S3Utils.listS3Objects(configuration,
includeExcludeMatcher, warningCollector);
+ if (j % 100 == 0) {
+ LOGGER.info("Request {} done: {} seconds", j,
+ Duration.ofNanos(System.nanoTime() -
thisRun).toMillis() / 1000d);
+ }
+ }
+ LOGGER.info("-------------- {} requests ended --------------",
tries);
+ long end = System.nanoTime();
+ LOGGER.info("Total time for {} requests: {} seconds", tries,
+ Duration.ofNanos(end - start).toMillis() / 1000d);
+ }
+
List<S3Object> filesOnly = S3Utils.listS3Objects(configuration,
includeExcludeMatcher, warningCollector);
filesOnly = filterPrefixes(externalDataPrefix, filesOnly,
filterEvaluatorFactory.create(ctx, warningCollector));
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17728
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Ie38d266cb9877c2de211ec253ef31c435cd29cc5
Gerrit-Change-Number: 17728
Gerrit-PatchSet: 1
Gerrit-Owner: Hussain Towaileb <[email protected]>
Gerrit-MessageType: newchange