This is an automated email from the ASF dual-hosted git repository.
htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new fc5b4f14c4 [NO ISSUE][EXT]: Correctly point to JSON files to convert
to Parquet
fc5b4f14c4 is described below
commit fc5b4f14c474c531c9c8351e1aeb9db3cea24999
Author: Hussain Towaileb <[email protected]>
AuthorDate: Wed Sep 6 22:14:58 2023 +0300
[NO ISSUE][EXT]: Correctly point to JSON files to convert to Parquet
Change-Id: Ia6d4531ffe202c4eeaff10652018c05952a564c3
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17763
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Hussain Towaileb <[email protected]>
Reviewed-by: Wail Alkowaileet <[email protected]>
---
.../test/external_dataset/ExternalDatasetTestUtils.java | 15 +++++++++++----
.../external_dataset/aws/AwsS3ExternalDatasetTest.java | 5 +++++
.../microsoft/AzureBlobStorageExternalDatasetTest.java | 2 ++
.../external_dataset/parquet/BinaryFileConverterUtil.java | 6 +++---
4 files changed, 21 insertions(+), 7 deletions(-)
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index c936fd2e10..f61ccbeb30 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -96,12 +96,19 @@ public class ExternalDatasetTestUtils {
BinaryFileConverterUtil.cleanBinaryDirectory(basePath,
BINARY_GEN_BASEDIR);
//Convert files in DEFAULT_PARQUET_SRC_PATH to parquet
BinaryFileConverterUtil.convertToParquet(basePath, parquetRawJsonDir,
BINARY_GEN_BASEDIR);
+ }
+
+ /**
+ * Generate binary files (e.g., parquet files)
+ */
+ public static void createBinaryFilesRecursively(String
dataToConvertDirPath) throws IOException {
+ //base path
+ File basePath = new File(".");
// convert certain files related to dynamic prefixes
- String dataPath = "data/json/external-filter";
- int startIndex = "data/json/".length(); // length to substring out of
the final file name
- BinaryFileConverterUtil.convertToParquetRecursively(basePath,
dataPath, BINARY_GEN_BASEDIR, JSON_FILTER,
- startIndex);
+ int startIndex = dataToConvertDirPath.indexOf("/external-filter");
+ BinaryFileConverterUtil.convertToParquetRecursively(basePath,
dataToConvertDirPath, BINARY_GEN_BASEDIR,
+ JSON_FILTER, startIndex);
}
public static void setDataPaths(String jsonDataPath, String csvDataPath,
String tsvDataPath) {
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 0121b588ea..246ea135d4 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -19,6 +19,7 @@
package org.apache.asterix.test.external_dataset.aws;
import static
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.createBinaryFiles;
+import static
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.createBinaryFilesRecursively;
import static
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setDataPaths;
import static
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setUploaders;
import static
org.apache.asterix.test.external_dataset.parquet.BinaryFileConverterUtil.DEFAULT_PARQUET_SRC_PATH;
@@ -106,6 +107,9 @@ public class AwsS3ExternalDatasetTest {
private static final Logger LOGGER = LogManager.getLogger();
+ private static final String PATH_BASE = joinPath("data");
+ private static final String EXTERNAL_FILTER_DATA_PATH =
joinPath(PATH_BASE, "json", "external-filter");
+
// subclasses of this class MUST instantiate these variables before using
them to avoid unexpected behavior
static String SUITE_TESTS;
static String ONLY_TESTS;
@@ -186,6 +190,7 @@ public class AwsS3ExternalDatasetTest {
final TestExecutor testExecutor = new AwsTestExecutor();
LangExecutionUtil.setUp(TEST_CONFIG_FILE_NAME, testExecutor);
createBinaryFiles(DEFAULT_PARQUET_SRC_PATH);
+ createBinaryFilesRecursively(EXTERNAL_FILTER_DATA_PATH);
setNcEndpoints(testExecutor);
startAwsS3MockServer();
}
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
index 49f40acbc0..08f3816a49 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
@@ -94,6 +94,7 @@ public class AzureBlobStorageExternalDatasetTest {
private static final String CSV_DATA_PATH = joinPath("data", "csv");
private static final String TSV_DATA_PATH = joinPath("data", "tsv");
private static final String PARQUET_RAW_DATA_PATH = joinPath("data",
"hdfs", "parquet");
+ public static final String EXTERNAL_FILTER_DATA_PATH = joinPath("data",
"json", "external-filter");
// Region, container and definitions
private static final String PLAYGROUND_CONTAINER = "playground";
@@ -122,6 +123,7 @@ public class AzureBlobStorageExternalDatasetTest {
public static void setUp() throws Exception {
final TestExecutor testExecutor = new AzureTestExecutor();
ExternalDatasetTestUtils.createBinaryFiles(PARQUET_RAW_DATA_PATH);
+ createBinaryFilesRecursively(EXTERNAL_FILTER_DATA_PATH);
LangExecutionUtil.setUp(TEST_CONFIG_FILE_NAME, testExecutor);
setNcEndpoints(testExecutor);
createBlobServiceClient();
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java
index 5cf4976d63..93ac24eedc 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java
@@ -71,11 +71,11 @@ public class BinaryFileConverterUtil {
ParquetFileExampleGeneratorUtil.writeExample();
}
- public static void convertToParquetRecursively(File localDataRoot, String
dirPath, String dest,
- FilenameFilter filter, int startIndex) throws IOException {
+ public static void convertToParquetRecursively(File localDataRoot, String
src, String dest, FilenameFilter filter,
+ int startIndex) throws IOException {
File destPath = new File(localDataRoot, dest);
- File dir = new File(dirPath);
+ File dir = new File(src);
if (!dir.exists() || !dir.isDirectory()) {
return;
}