This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 381c707930 [NO ISSUE]: Support Reading Single Depth Files/Folder from 
S3 Container
381c707930 is described below

commit 381c7079309572c35eee46142c5fcbcec1151de0
Author: utsavCbase <utsav.si...@couchbase.com>
AuthorDate: Thu Mar 21 13:38:27 2024 +0530

    [NO ISSUE]: Support Reading Single Depth Files/Folder from S3 Container
    
    Change-Id: Iae9d85eb9899419e63c86322cd8da2273adf89c6
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18206
    Reviewed-by: Utsav Singh <utsav.si...@couchbase.com>
    Reviewed-by: Hussain Towaileb <hussai...@gmail.com>
    Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
---
 .../external_dataset/ExternalDatasetTestUtils.java | 38 +++++++++++++++
 .../aws/AwsS3ExternalDatasetTest.java              |  1 +
 .../asterix/external/util/aws/s3/S3Utils.java      | 57 ++++++++++++++++++++++
 3 files changed, 96 insertions(+)

diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index 55a515c879..34d209eb7a 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -20,6 +20,7 @@ package org.apache.asterix.test.external_dataset;
 
 import static 
org.apache.asterix.test.external_dataset.avro.AvroFileConverterUtil.AVRO_GEN_BASEDIR;
 import static 
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.BOM_FILE_CONTAINER;
+import static 
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.BROWSE_CONTAINER;
 import static 
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.DYNAMIC_PREFIX_AT_START_CONTAINER;
 import static 
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.FIXED_DATA_CONTAINER;
 import static 
org.apache.asterix.test.external_dataset.parquet.BinaryFileConverterUtil.BINARY_GEN_BASEDIR;
@@ -78,6 +79,7 @@ public class ExternalDatasetTestUtils {
     private static Uploader fixedDataLoader;
     private static Uploader mixedDataLoader;
     private static Uploader bomFileLoader;
+    private static Uploader browseDataLoader;
 
     protected TestCaseContext tcCtx;
 
@@ -148,6 +150,16 @@ public class ExternalDatasetTestUtils {
         ExternalDatasetTestUtils.bomFileLoader = bomFileLoader;
     }
 
+    public static void setUploaders(Uploader playgroundDataLoader, Uploader 
dynamicPrefixAtStartDataLoader,
+            Uploader fixedDataLoader, Uploader mixedDataLoader, Uploader 
bomFileLoader, Uploader browseDataLoader) {
+        ExternalDatasetTestUtils.playgroundDataLoader = playgroundDataLoader;
+        ExternalDatasetTestUtils.dynamicPrefixAtStartDataLoader = 
dynamicPrefixAtStartDataLoader;
+        ExternalDatasetTestUtils.fixedDataLoader = fixedDataLoader;
+        ExternalDatasetTestUtils.mixedDataLoader = mixedDataLoader;
+        ExternalDatasetTestUtils.bomFileLoader = bomFileLoader;
+        ExternalDatasetTestUtils.browseDataLoader = browseDataLoader;
+    }
+
     /**
      * Creates a bucket and fills it with some files for testing purpose.
      */
@@ -183,6 +195,32 @@ public class ExternalDatasetTestUtils {
         LOGGER.info("Files added successfully");
     }
 
+    public static void prepareBrowseContainer() {
+        /*
+        file hierarchy inside browse container
+        browse/1.json
+        browse/2.json
+        browse/level1/3.json
+        browse/level1/4.json
+        browse/level1/level2/5.json
+        browse/level2/level3/6.json
+         */
+        // -- todo:Utsav add a test for Browse S3 path which returns multiple 
folders, skipped for now as S3 mock server does not support this.
+        LOGGER.info("Adding JSON files to " + BROWSE_CONTAINER);
+        browseDataLoader.upload("1.json", "{\"id\":" + 1 + "}");
+        browseDataLoader.upload("2.json", "{\"id\":" + 2 + "}");
+        browseDataLoader.upload("level1/3.json", "{\"id\":" + 3 + "}");
+        browseDataLoader.upload("level1/4.json", "{\"id\":" + 4 + "}");
+        browseDataLoader.upload("level1/level2/5.json", "{\"id\":" + 5 + "}");
+        browseDataLoader.upload("level2/level3/6.json", "{\"id\":" + 6 + "}");
+
+        //Adding 1000+ files
+        for (int i = 1; i <= 1500; i++) {
+            browseDataLoader.upload("level3/" + i + ".json", "{\"id\":" + i + 
"}");
+        }
+        LOGGER.info("JSON Files added successfully");
+    }
+
     /**
      * Special container where dynamic prefix is the first segment
      */
diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 7912d57960..9a2bfbcc7c 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -147,6 +147,7 @@ public class AwsS3ExternalDatasetTest {
     protected TestCaseContext tcCtx;
 
     public static final String PLAYGROUND_CONTAINER = "playground";
+    public static final String BROWSE_CONTAINER = "browse";
     public static final String DYNAMIC_PREFIX_AT_START_CONTAINER = 
"dynamic-prefix-at-start-container";
     public static final String FIXED_DATA_CONTAINER = "fixed-data"; // Do not 
use, has fixed data
     public static final String INCLUDE_EXCLUDE_CONTAINER = "include-exclude";
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
index 45e83b4216..6a16913258 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
@@ -48,6 +48,7 @@ import static 
org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -81,6 +82,7 @@ import software.amazon.awssdk.core.exception.SdkException;
 import software.amazon.awssdk.regions.Region;
 import software.amazon.awssdk.services.s3.S3Client;
 import software.amazon.awssdk.services.s3.S3ClientBuilder;
+import software.amazon.awssdk.services.s3.model.CommonPrefix;
 import software.amazon.awssdk.services.s3.model.ListObjectsRequest;
 import software.amazon.awssdk.services.s3.model.ListObjectsResponse;
 import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
@@ -88,6 +90,7 @@ import 
software.amazon.awssdk.services.s3.model.ListObjectsV2Response;
 import software.amazon.awssdk.services.s3.model.S3Exception;
 import software.amazon.awssdk.services.s3.model.S3Object;
 import software.amazon.awssdk.services.s3.model.S3Response;
+import software.amazon.awssdk.services.s3.paginators.ListObjectsV2Iterable;
 
 public class S3Utils {
     private S3Utils() {
@@ -519,4 +522,58 @@ public class S3Utils {
             }
         }
     }
+
+    public static Map<String, List<String>> S3ObjectsOfSingleDepth(Map<String, 
String> configuration, String container,
+            String prefix) throws CompilationException, HyracksDataException {
+        // create s3 client
+        S3Client s3Client = buildAwsS3Client(configuration);
+        // fetch all the s3 objects
+        return listS3ObjectsOfSingleDepth(s3Client, container, prefix);
+    }
+
+    /**
+     * Uses the latest API to retrieve the objects from the storage of a 
single level.
+     *
+     * @param s3Client              S3 client
+     * @param container             container name
+     * @param prefix                definition prefix
+     */
+    private static Map<String, List<String>> 
listS3ObjectsOfSingleDepth(S3Client s3Client, String container,
+            String prefix) throws HyracksDataException {
+        Map<String, List<String>> allObjects = new HashMap<>();
+        ListObjectsV2Iterable listObjectsInterable;
+        ListObjectsV2Request.Builder listObjectsBuilder =
+                
ListObjectsV2Request.builder().bucket(container).prefix(prefix).delimiter("/");
+        listObjectsBuilder.prefix(prefix);
+        List<String> files = new ArrayList<>();
+        List<String> folders = new ArrayList<>();
+        // to skip the prefix as a file from the response
+        boolean checkPrefixInFile = true;
+        listObjectsInterable = 
s3Client.listObjectsV2Paginator(listObjectsBuilder.build());
+        for (ListObjectsV2Response response : listObjectsInterable) {
+            // put all the files
+            for (S3Object object : response.contents()) {
+                String fileName = object.key();
+                fileName = fileName.substring(prefix.length(), 
fileName.length());
+                if (checkPrefixInFile) {
+                    if (prefix.equals(object.key()))
+                        checkPrefixInFile = false;
+                    else {
+                        files.add(fileName);
+                    }
+                } else {
+                    files.add(fileName);
+                }
+            }
+            // put all the folders
+            for (CommonPrefix object : response.commonPrefixes()) {
+                String folderName = object.prefix();
+                folderName = folderName.substring(prefix.length(), 
folderName.length());
+                folders.add(folderName.endsWith("/") ? folderName.substring(0, 
folderName.length() - 1) : folderName);
+            }
+        }
+        allObjects.put("files", files);
+        allObjects.put("folders", folders);
+        return allObjects;
+    }
 }

Reply via email to