>From Utsav Singh <[email protected]>:
Utsav Singh has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18206 )
Change subject: [NO ISSUE]: Support Reading Single Depth Files/Folder from S3
Container
......................................................................
[NO ISSUE]: Support Reading Single Depth Files/Folder from S3 Container
Change-Id: Iae9d85eb9899419e63c86322cd8da2273adf89c6
---
M
asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
M
asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
3 files changed, 98 insertions(+), 11 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/06/18206/1
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index 55a515c..d426403 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -20,6 +20,7 @@
import static
org.apache.asterix.test.external_dataset.avro.AvroFileConverterUtil.AVRO_GEN_BASEDIR;
import static
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.BOM_FILE_CONTAINER;
+import static
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.BROWSE_CONTAINER;
import static
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.DYNAMIC_PREFIX_AT_START_CONTAINER;
import static
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.FIXED_DATA_CONTAINER;
import static
org.apache.asterix.test.external_dataset.parquet.BinaryFileConverterUtil.BINARY_GEN_BASEDIR;
@@ -78,6 +79,7 @@
private static Uploader fixedDataLoader;
private static Uploader mixedDataLoader;
private static Uploader bomFileLoader;
+ private static Uploader browseDataLoader;
protected TestCaseContext tcCtx;
@@ -148,6 +150,16 @@
ExternalDatasetTestUtils.bomFileLoader = bomFileLoader;
}
+ public static void setUploaders(Uploader playgroundDataLoader, Uploader
dynamicPrefixAtStartDataLoader,
+ Uploader fixedDataLoader, Uploader mixedDataLoader, Uploader
bomFileLoader, Uploader browseDataLoader) {
+ ExternalDatasetTestUtils.playgroundDataLoader = playgroundDataLoader;
+ ExternalDatasetTestUtils.dynamicPrefixAtStartDataLoader =
dynamicPrefixAtStartDataLoader;
+ ExternalDatasetTestUtils.fixedDataLoader = fixedDataLoader;
+ ExternalDatasetTestUtils.mixedDataLoader = mixedDataLoader;
+ ExternalDatasetTestUtils.bomFileLoader = bomFileLoader;
+ ExternalDatasetTestUtils.browseDataLoader = browseDataLoader;
+ }
+
/**
* Creates a bucket and fills it with some files for testing purpose.
*/
@@ -183,6 +195,26 @@
LOGGER.info("Files added successfully");
}
+ public static void prepareBrowseContainer() {
+ /*
+ file hierarchy inside browse container
+ browse/1.json
+ browse/2.json
+ browse/level1/3.json
+ browse/level1/4.json
+ browse/level1/level2/5.json
+ browse/level2/level3/6.json
+ */
+ LOGGER.info("Adding JSON files to " + BROWSE_CONTAINER);
+ browseDataLoader.upload("1.json", "");
+ browseDataLoader.upload("2.json", "");
+ browseDataLoader.upload("level1/3.json", "");
+ browseDataLoader.upload("level1/4.json", "");
+ browseDataLoader.upload("level1/level2/5.json", "");
+ browseDataLoader.upload("level2/level3/6.json", "");
+ LOGGER.info("JSON Files added successfully");
+ }
+
/**
* Special container where dynamic prefix is the first segment
*/
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 7912d57..1770746 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -147,6 +147,7 @@
protected TestCaseContext tcCtx;
public static final String PLAYGROUND_CONTAINER = "playground";
+ public static final String BROWSE_CONTAINER = "playground";
public static final String DYNAMIC_PREFIX_AT_START_CONTAINER =
"dynamic-prefix-at-start-container";
public static final String FIXED_DATA_CONTAINER = "fixed-data"; // Do not
use, has fixed data
public static final String INCLUDE_EXCLUDE_CONTAINER = "include-exclude";
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
index f028af3..831ca81 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
@@ -47,10 +47,7 @@
import java.net.URI;
import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
+import java.util.*;
import java.util.function.BiPredicate;
import java.util.regex.Matcher;
@@ -81,13 +78,8 @@
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.S3ClientBuilder;
-import software.amazon.awssdk.services.s3.model.ListObjectsRequest;
-import software.amazon.awssdk.services.s3.model.ListObjectsResponse;
-import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
-import software.amazon.awssdk.services.s3.model.ListObjectsV2Response;
-import software.amazon.awssdk.services.s3.model.S3Exception;
-import software.amazon.awssdk.services.s3.model.S3Object;
-import software.amazon.awssdk.services.s3.model.S3Response;
+import software.amazon.awssdk.services.s3.model.*;
+import software.amazon.awssdk.services.s3.paginators.ListObjectsV2Iterable;
public class S3Utils {
private S3Utils() {
@@ -510,4 +502,57 @@
}
}
}
+
+ public static Map<String, List<String>> S3ObjectsOfSingleDepth(Map<String,
String> configuration, String container,
+ String prefix) throws CompilationException, HyracksDataException {
+ // create s3 client
+ S3Client s3Client = buildAwsS3Client(configuration);
+ // fetch all the s3 objects
+ Map<String, List<String>> allObjects =
listS3ObjectsOfSingleDepth(s3Client, container, prefix);
+ return allObjects;
+ }
+
+ /**
+ * Uses the latest API to retrieve the objects from the storage of a
single level.
+ *
+ * @param s3Client S3 client
+ * @param container container name
+ * @param prefix definition prefix
+ */
+ private static Map<String, List<String>>
listS3ObjectsOfSingleDepth(S3Client s3Client, String container,
+ String prefix) throws HyracksDataException {
+ Map<String, List<String>> allObjects = new HashMap<>();
+ ListObjectsV2Iterable listObjectsInterable;
+ ListObjectsV2Request.Builder listObjectsBuilder =
+
ListObjectsV2Request.builder().bucket(container).prefix(prefix).delimiter("/");
+ listObjectsBuilder.prefix(prefix);
+ List<String> files = new ArrayList<>();
+ List<String> folders = new ArrayList<>();
+ // to skip the prefix as a file from the response
+ Boolean checkPrefixInFile = true;
+ listObjectsInterable =
s3Client.listObjectsV2Paginator(listObjectsBuilder.build());
+ // put all the files
+ for (S3Object object : listObjectsInterable.contents()) {
+ String fileName = object.key();
+ fileName = fileName.substring(prefix.length(), fileName.length());
+ if (checkPrefixInFile) {
+ if (prefix.equals(object.key()))
+ checkPrefixInFile = false;
+ else {
+ files.add(fileName);
+ }
+ } else {
+ files.add(fileName);
+ }
+ }
+ //put all the folders
+ for (CommonPrefix object : listObjectsInterable.commonPrefixes()) {
+ String folderName = object.prefix();
+ folderName = folderName.substring(prefix.length(),
folderName.length());
+ folders.add(folderName.endsWith("/") ? folderName.substring(0,
folderName.length() - 1) : folderName);
+ }
+ allObjects.put("files", files);
+ allObjects.put("folders", folders);
+ return allObjects;
+ }
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18206
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Iae9d85eb9899419e63c86322cd8da2273adf89c6
Gerrit-Change-Number: 18206
Gerrit-PatchSet: 1
Gerrit-Owner: Utsav Singh <[email protected]>
Gerrit-MessageType: newchange