This is an automated email from the ASF dual-hosted git repository.

htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new fc5b4f14c4 [NO ISSUE][EXT]: Correctly point to JSON files to convert 
to Parquet
fc5b4f14c4 is described below

commit fc5b4f14c474c531c9c8351e1aeb9db3cea24999
Author: Hussain Towaileb <[email protected]>
AuthorDate: Wed Sep 6 22:14:58 2023 +0300

    [NO ISSUE][EXT]: Correctly point to JSON files to convert to Parquet
    
    Change-Id: Ia6d4531ffe202c4eeaff10652018c05952a564c3
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17763
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Hussain Towaileb <[email protected]>
    Reviewed-by: Wail Alkowaileet <[email protected]>
---
 .../test/external_dataset/ExternalDatasetTestUtils.java   | 15 +++++++++++----
 .../external_dataset/aws/AwsS3ExternalDatasetTest.java    |  5 +++++
 .../microsoft/AzureBlobStorageExternalDatasetTest.java    |  2 ++
 .../external_dataset/parquet/BinaryFileConverterUtil.java |  6 +++---
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index c936fd2e10..f61ccbeb30 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -96,12 +96,19 @@ public class ExternalDatasetTestUtils {
         BinaryFileConverterUtil.cleanBinaryDirectory(basePath, 
BINARY_GEN_BASEDIR);
         //Convert files in DEFAULT_PARQUET_SRC_PATH to parquet
         BinaryFileConverterUtil.convertToParquet(basePath, parquetRawJsonDir, 
BINARY_GEN_BASEDIR);
+    }
+
+    /**
+     * Generate binary files (e.g., parquet files)
+     */
+    public static void createBinaryFilesRecursively(String 
dataToConvertDirPath) throws IOException {
+        //base path
+        File basePath = new File(".");
 
         // convert certain files related to dynamic prefixes
-        String dataPath = "data/json/external-filter";
-        int startIndex = "data/json/".length(); // length to substring out of 
the final file name
-        BinaryFileConverterUtil.convertToParquetRecursively(basePath, 
dataPath, BINARY_GEN_BASEDIR, JSON_FILTER,
-                startIndex);
+        int startIndex = dataToConvertDirPath.indexOf("/external-filter");
+        BinaryFileConverterUtil.convertToParquetRecursively(basePath, 
dataToConvertDirPath, BINARY_GEN_BASEDIR,
+                JSON_FILTER, startIndex);
     }
 
     public static void setDataPaths(String jsonDataPath, String csvDataPath, 
String tsvDataPath) {
diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 0121b588ea..246ea135d4 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -19,6 +19,7 @@
 package org.apache.asterix.test.external_dataset.aws;
 
 import static 
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.createBinaryFiles;
+import static 
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.createBinaryFilesRecursively;
 import static 
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setDataPaths;
 import static 
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setUploaders;
 import static 
org.apache.asterix.test.external_dataset.parquet.BinaryFileConverterUtil.DEFAULT_PARQUET_SRC_PATH;
@@ -106,6 +107,9 @@ public class AwsS3ExternalDatasetTest {
 
     private static final Logger LOGGER = LogManager.getLogger();
 
+    private static final String PATH_BASE = joinPath("data");
+    private static final String EXTERNAL_FILTER_DATA_PATH = 
joinPath(PATH_BASE, "json", "external-filter");
+
     // subclasses of this class MUST instantiate these variables before using 
them to avoid unexpected behavior
     static String SUITE_TESTS;
     static String ONLY_TESTS;
@@ -186,6 +190,7 @@ public class AwsS3ExternalDatasetTest {
         final TestExecutor testExecutor = new AwsTestExecutor();
         LangExecutionUtil.setUp(TEST_CONFIG_FILE_NAME, testExecutor);
         createBinaryFiles(DEFAULT_PARQUET_SRC_PATH);
+        createBinaryFilesRecursively(EXTERNAL_FILTER_DATA_PATH);
         setNcEndpoints(testExecutor);
         startAwsS3MockServer();
     }
diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
index 49f40acbc0..08f3816a49 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
@@ -94,6 +94,7 @@ public class AzureBlobStorageExternalDatasetTest {
     private static final String CSV_DATA_PATH = joinPath("data", "csv");
     private static final String TSV_DATA_PATH = joinPath("data", "tsv");
     private static final String PARQUET_RAW_DATA_PATH = joinPath("data", 
"hdfs", "parquet");
+    public static final String EXTERNAL_FILTER_DATA_PATH = joinPath("data", 
"json", "external-filter");
 
     // Region, container and definitions
     private static final String PLAYGROUND_CONTAINER = "playground";
@@ -122,6 +123,7 @@ public class AzureBlobStorageExternalDatasetTest {
     public static void setUp() throws Exception {
         final TestExecutor testExecutor = new AzureTestExecutor();
         ExternalDatasetTestUtils.createBinaryFiles(PARQUET_RAW_DATA_PATH);
+        createBinaryFilesRecursively(EXTERNAL_FILTER_DATA_PATH);
         LangExecutionUtil.setUp(TEST_CONFIG_FILE_NAME, testExecutor);
         setNcEndpoints(testExecutor);
         createBlobServiceClient();
diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java
index 5cf4976d63..93ac24eedc 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java
@@ -71,11 +71,11 @@ public class BinaryFileConverterUtil {
         ParquetFileExampleGeneratorUtil.writeExample();
     }
 
-    public static void convertToParquetRecursively(File localDataRoot, String 
dirPath, String dest,
-            FilenameFilter filter, int startIndex) throws IOException {
+    public static void convertToParquetRecursively(File localDataRoot, String 
src, String dest, FilenameFilter filter,
+            int startIndex) throws IOException {
         File destPath = new File(localDataRoot, dest);
 
-        File dir = new File(dirPath);
+        File dir = new File(src);
         if (!dir.exists() || !dir.isDirectory()) {
             return;
         }

Reply via email to