This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit df17aaabf707147a0399313fca13b533b94e9844 Author: Hussain Towaileb <[email protected]> AuthorDate: Tue Apr 28 15:08:22 2020 +0300 [ASTERIXDB-2719][EXT] Default external datasets to read .gz and .gzip files as well - user model changes: no - storage format changes: no - interface changes: no Details: - Added support to properly stream and read .gz files. - Added test cases for .json.gz files, and a mix of .json and .gz files. Change-Id: Ic16044966400954d0cb7c36b99839ad91267ff84 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6043 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> Reviewed-by: Dmitry Lychagin <[email protected]> --- .../aws/AwsS3ExternalDatasetTest.java | 155 +++++++++++++++------ .../{000 => gz}/external_dataset.000.ddl.sqlpp | 8 +- .../{000 => gz}/external_dataset.001.query.sqlpp | 0 .../{000 => gz}/external_dataset.002.query.sqlpp | 0 .../{000 => gz}/external_dataset.003.query.sqlpp | 0 .../{000 => gz}/external_dataset.004.query.sqlpp | 0 .../{000 => gz}/external_dataset.005.query.sqlpp | 0 .../{000 => gz}/external_dataset.006.query.sqlpp | 0 .../{000 => gz}/external_dataset.007.ddl.sqlpp | 0 .../{000 => json}/external_dataset.000.ddl.sqlpp | 8 +- .../{000 => json}/external_dataset.001.query.sqlpp | 0 .../{000 => json}/external_dataset.002.query.sqlpp | 0 .../{000 => json}/external_dataset.003.query.sqlpp | 0 .../{000 => json}/external_dataset.004.query.sqlpp | 0 .../{000 => json}/external_dataset.005.query.sqlpp | 0 .../{000 => json}/external_dataset.006.query.sqlpp | 0 .../{000 => json}/external_dataset.007.ddl.sqlpp | 0 .../{000 => mixed}/external_dataset.000.ddl.sqlpp | 8 +- .../external_dataset.001.query.sqlpp | 0 .../external_dataset.002.query.sqlpp | 0 .../external_dataset.003.query.sqlpp | 0 .../external_dataset.004.query.sqlpp | 0 .../external_dataset.005.query.sqlpp | 0 .../external_dataset.006.query.sqlpp | 0 .../{000 => mixed}/external_dataset.007.ddl.sqlpp | 0 .../aws/s3/json/000/external_dataset.004.adm | 1 - .../aws/s3/json/000/external_dataset.005.adm | 10 -- .../aws/s3/json/000/external_dataset.006.adm | 1 - .../aws/s3/json/000/external_dataset.007.adm | 10 -- .../external_dataset.001.adm} | 0 .../s3/json/{000 => gz}/external_dataset.002.adm | 0 .../aws/s3/json/gz/external_dataset.003.adm | 1 + .../aws/s3/json/gz/external_dataset.004.adm | 25 ++++ .../aws/s3/json/gz/external_dataset.005.adm | 1 + .../aws/s3/json/gz/external_dataset.006.adm | 25 ++++ .../external_dataset.001.adm} | 0 .../s3/json/{000 => json}/external_dataset.002.adm | 0 .../aws/s3/json/json/external_dataset.003.adm | 1 + .../aws/s3/json/json/external_dataset.004.adm | 25 ++++ .../aws/s3/json/json/external_dataset.005.adm | 1 + .../aws/s3/json/json/external_dataset.006.adm | 25 ++++ .../aws/s3/json/mixed/external_dataset.001.adm | 1 + .../aws/s3/json/mixed/external_dataset.002.adm | 1 + .../aws/s3/json/mixed/external_dataset.003.adm | 1 + .../aws/s3/json/mixed/external_dataset.004.adm | 50 +++++++ .../aws/s3/json/mixed/external_dataset.005.adm | 1 + .../aws/s3/json/mixed/external_dataset.006.adm | 50 +++++++ .../runtimets/testsuite_external_dataset.xml | 14 +- .../input/record/reader/aws/AwsS3InputStream.java | 15 +- .../record/reader/aws/AwsS3InputStreamFactory.java | 19 ++- 50 files changed, 373 insertions(+), 84 deletions(-) diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java index 6ff59ee..c76a7ca 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java @@ -20,10 +20,12 @@ package org.apache.asterix.test.external_dataset.aws; import static org.apache.hyracks.util.file.FileUtil.joinPath; +import java.io.ByteArrayOutputStream; import java.io.File; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.URI; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collection; @@ -32,6 +34,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.zip.GZIPOutputStream; import org.apache.asterix.common.api.INcApplicationContext; import org.apache.asterix.test.common.TestExecutor; @@ -46,6 +49,7 @@ import org.apache.hyracks.control.nc.NodeControllerService; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.FixMethodOrder; import org.junit.Test; @@ -85,10 +89,6 @@ public class AwsS3ExternalDatasetTest { private static final String CSV_DATA_PATH = joinPath("data", "csv"); private static final String TSV_DATA_PATH = joinPath("data", "tsv"); - // IMPORTANT: The following values must be used in the AWS S3 test case - private static S3Mock s3MockServer; - private static S3Client client; - // Service endpoint private static final int S3_MOCK_SERVER_PORT = 8001; private static final String S3_MOCK_SERVER_HOSTNAME = "http://localhost:" + S3_MOCK_SERVER_PORT; @@ -105,6 +105,11 @@ public class AwsS3ExternalDatasetTest { private static final DeleteBucketRequest.Builder DELETE_BUCKET_BUILDER = DeleteBucketRequest.builder(); private static final PutObjectRequest.Builder PUT_OBJECT_BUILDER = PutObjectRequest.builder(); + // IMPORTANT: The following values must be used in the AWS S3 test case + private static S3Mock s3MockServer; + private static S3Client client; + private static PutObjectRequest.Builder builder = PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET); + protected TestCaseContext tcCtx; public AwsS3ExternalDatasetTest(TestCaseContext tcCtx) { @@ -192,55 +197,117 @@ public class AwsS3ExternalDatasetTest { client.createBucket(CreateBucketRequest.builder().bucket(S3_MOCK_SERVER_BUCKET).build()); LOGGER.info("bucket created successfully"); - // Load JSON files + LOGGER.info("Adding JSON files to the bucket"); loadJsonFiles(); + LOGGER.info("JSON Files added successfully"); + + LOGGER.info("Adding CSV files to the bucket"); loadCsvFiles(); + LOGGER.info("CSV Files added successfully"); + + LOGGER.info("Adding TSV files to the bucket"); loadTsvFiles(); + LOGGER.info("TSV Files added successfully"); LOGGER.info("Files added successfully"); } private static void loadJsonFiles() { - LOGGER.info("Adding JSON files to the bucket"); + String dataBasePath = JSON_DATA_PATH; + String definition = S3_MOCK_SERVER_BUCKET_JSON_DEFINITION; + + // Json data + String definitionSegment = "json"; + loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false); + loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false); + loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false); + loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment, + false); + + // Json gz compressed data + definitionSegment = "gz"; + loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false); + loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false); + loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false); + loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment, + false); + + // Mixed json and json gz compressed data + definitionSegment = "mixed"; + loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false); + loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false); + loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false); + loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment, + false); + loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false); + loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false); + loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false); + loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment, + false); + } + + private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition, + String definitionSegment, boolean removeExtension) { + // Files data + Path filePath = Paths.get(fileBasePath, filePathSegment, filename); + RequestBody requestBody = RequestBody.fromFile(filePath); + + // Keep or remove the file extension + Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension); + String finalFileName; + if (removeExtension) { + finalFileName = FilenameUtils.removeExtension(filename); + } else { + finalFileName = filename; + } - // Set the bucket - PutObjectRequest.Builder builder1 = PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET); - - // load multi-level single line JSON files - String singleLineBasePath = S3_MOCK_SERVER_BUCKET_JSON_DEFINITION + "single-line/"; - Path filePath1 = Paths.get(JSON_DATA_PATH, "single-line", "20-records.json"); - RequestBody reqBody1 = RequestBody.fromFile(filePath1); - client.putObject(builder1.key(singleLineBasePath + "20-records.json").build(), reqBody1); - client.putObject(builder1.key(singleLineBasePath + "level1a/" + "20-records.json").build(), reqBody1); - client.putObject(builder1.key(singleLineBasePath + "level1b/" + "20-records.json").build(), reqBody1); - client.putObject(builder1.key(singleLineBasePath + "level1a/level2a/" + "20-records.json").build(), reqBody1); - client.putObject(builder1.key(singleLineBasePath + "level1a/level2b/" + "20-records.json").build(), reqBody1); - - // Load multi-level multi-lines JSON files - String multiLinesBasePath = S3_MOCK_SERVER_BUCKET_JSON_DEFINITION + "multi-lines/"; - Path filePath2 = Paths.get(JSON_DATA_PATH, "multi-lines", "20-records.json"); - RequestBody reqBody2 = RequestBody.fromFile(filePath2); - client.putObject(builder1.key(multiLinesBasePath + "20-records.json").build(), reqBody2); - client.putObject(builder1.key(multiLinesBasePath + "level1a/" + "20-records.json").build(), reqBody2); - client.putObject(builder1.key(multiLinesBasePath + "level1b/" + "20-records.json").build(), reqBody2); - client.putObject(builder1.key(multiLinesBasePath + "level1a/level2a/" + "20-records.json").build(), reqBody2); - client.putObject(builder1.key(multiLinesBasePath + "level1a/level2b/" + "20-records.json").build(), reqBody2); - - // Load multi-level multi-lines with array JSON files - String multiLinesWithArraysBasePath = S3_MOCK_SERVER_BUCKET_JSON_DEFINITION + "multi-lines-with-arrays/"; - Path filePath3 = Paths.get(JSON_DATA_PATH, "multi-lines-with-arrays", "5-records.json"); - RequestBody reqBody3 = RequestBody.fromFile(filePath3); - client.putObject(builder1.key(multiLinesWithArraysBasePath + "5-records.json").build(), reqBody3); - client.putObject(builder1.key(multiLinesWithArraysBasePath + "level1a/" + "5-records.json").build(), reqBody3); - - // Load multi-level multi-lines with nested objects JSON files - String multiLinesWithNestedObjectsBasePath = - S3_MOCK_SERVER_BUCKET_JSON_DEFINITION + "multi-lines-with-nested-objects/"; - Path filePath4 = Paths.get(JSON_DATA_PATH, "multi-lines-with-nested-objects", "5-records.json"); - RequestBody reqBody4 = RequestBody.fromFile(filePath4); - client.putObject(builder1.key(multiLinesWithNestedObjectsBasePath + "5-records.json").build(), reqBody4); - client.putObject(builder1.key(multiLinesWithNestedObjectsBasePath + "level1a/" + "5-records.json").build(), - reqBody4); + // Files base definition + String basePath = definition + filePathSegment + "/" + definitionSegment + "/"; + + // Load the data + client.putObject(builder.key(basePath + finalFileName).build(), requestBody); + client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody); + client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody); + client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody); + client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody); + } + + private static void loadGzData(String fileBasePath, String filePathSegment, String filename, String definition, + String definitionSegment, boolean removeExtension) { + try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) { + + // Files data + Path filePath = Paths.get(fileBasePath, filePathSegment, filename); + + // Get the compressed data + gzipOutputStream.write(Files.readAllBytes(filePath)); + gzipOutputStream.close(); // Need to close or data will be invalid + byte[] gzipBytes = byteArrayOutputStream.toByteArray(); + RequestBody requestBody = RequestBody.fromBytes(gzipBytes); + + // Keep or remove the file extension + Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension); + String finalFileName; + if (removeExtension) { + finalFileName = FilenameUtils.removeExtension(filename); + } else { + finalFileName = filename; + } + finalFileName += ".gz"; + + // Files base definition + String basePath = definition + filePathSegment + "/" + definitionSegment + "/"; + + // Load the data + client.putObject(builder.key(basePath + finalFileName).build(), requestBody); + client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody); + client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody); + client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody); + client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody); + } catch (Exception ex) { + LOGGER.error(ex.getMessage()); + } } private static void loadCsvFiles() { diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.000.ddl.sqlpp similarity index 92% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.000.ddl.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.000.ddl.sqlpp index 8d084a1..b5b2c48 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.000.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.000.ddl.sqlpp @@ -32,7 +32,7 @@ create external dataset test1(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/single-line"), +("definition"="json-data/reviews/single-line/gz"), ("format"="json") ); @@ -43,7 +43,7 @@ create external dataset test2(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/multi-lines"), +("definition"="json-data/reviews/multi-lines/gz"), ("format"="json") ); @@ -54,7 +54,7 @@ create external dataset test3(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/multi-lines-with-arrays"), +("definition"="json-data/reviews/multi-lines-with-arrays/gz"), ("format"="json") ); @@ -65,6 +65,6 @@ create external dataset test4(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/multi-lines-with-nested-objects"), +("definition"="json-data/reviews/multi-lines-with-nested-objects/gz"), ("format"="json") ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.001.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.001.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.001.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.002.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.002.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.002.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.003.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.003.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.003.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.004.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.004.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.004.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.004.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.005.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.005.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.005.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.005.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.006.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.006.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.006.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.006.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.007.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.007.ddl.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.007.ddl.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/gz/external_dataset.007.ddl.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp similarity index 91% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.000.ddl.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp index 8d084a1..b6d875b 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.000.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp @@ -32,7 +32,7 @@ create external dataset test1(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/single-line"), +("definition"="json-data/reviews/single-line/json"), ("format"="json") ); @@ -43,7 +43,7 @@ create external dataset test2(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/multi-lines"), +("definition"="json-data/reviews/multi-lines/json"), ("format"="json") ); @@ -54,7 +54,7 @@ create external dataset test3(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/multi-lines-with-arrays"), +("definition"="json-data/reviews/multi-lines-with-arrays/json"), ("format"="json") ); @@ -65,6 +65,6 @@ create external dataset test4(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/multi-lines-with-nested-objects"), +("definition"="json-data/reviews/multi-lines-with-nested-objects/json"), ("format"="json") ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.001.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.001.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.001.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.002.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.002.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.002.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.003.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.003.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.003.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.004.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.004.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.004.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.004.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.005.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.005.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.005.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.005.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.006.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.006.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.006.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.006.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.007.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.007.ddl.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.000.ddl.sqlpp similarity index 91% rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.000.ddl.sqlpp rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.000.ddl.sqlpp index 8d084a1..ca492ea 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.000.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.000.ddl.sqlpp @@ -32,7 +32,7 @@ create external dataset test1(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/single-line"), +("definition"="json-data/reviews/single-line/mixed"), ("format"="json") ); @@ -43,7 +43,7 @@ create external dataset test2(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/multi-lines"), +("definition"="json-data/reviews/multi-lines/mixed"), ("format"="json") ); @@ -54,7 +54,7 @@ create external dataset test3(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/multi-lines-with-arrays"), +("definition"="json-data/reviews/multi-lines-with-arrays/mixed"), ("format"="json") ); @@ -65,6 +65,6 @@ create external dataset test4(test) using S3 ( ("region"="us-west-2"), ("serviceEndpoint"="http://localhost:8001"), ("container"="playground"), -("definition"="json-data/reviews/multi-lines-with-nested-objects"), +("definition"="json-data/reviews/multi-lines-with-nested-objects/mixed"), ("format"="json") ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.001.query.sqlpp similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.001.query.sqlpp rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.001.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.002.query.sqlpp similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.002.query.sqlpp rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.002.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.003.query.sqlpp similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.003.query.sqlpp rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.003.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.004.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.004.query.sqlpp similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.004.query.sqlpp rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.004.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.005.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.005.query.sqlpp similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.005.query.sqlpp rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.005.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.006.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.006.query.sqlpp similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.006.query.sqlpp rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.006.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.007.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.007.ddl.sqlpp similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/000/external_dataset.007.ddl.sqlpp rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/mixed/external_dataset.007.ddl.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.004.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.004.adm deleted file mode 100644 index d10a16c..0000000 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.004.adm +++ /dev/null @@ -1 +0,0 @@ -{ "count": 10 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.005.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.005.adm deleted file mode 100644 index a1881c3..0000000 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.005.adm +++ /dev/null @@ -1,10 +0,0 @@ -{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } -{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } -{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } -{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } -{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } -{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } -{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } -{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } -{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } -{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.006.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.006.adm deleted file mode 100644 index d10a16c..0000000 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.006.adm +++ /dev/null @@ -1 +0,0 @@ -{ "count": 10 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.007.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.007.adm deleted file mode 100644 index d24a08b..0000000 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.007.adm +++ /dev/null @@ -1,10 +0,0 @@ -{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } -{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } -{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } -{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } -{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } -{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } -{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } -{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } -{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } -{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.001.adm similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.003.adm copy to asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.001.adm diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.002.adm similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.002.adm copy to asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.002.adm diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.003.adm new file mode 100644 index 0000000..5db606c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.003.adm @@ -0,0 +1 @@ +{ "count": 25 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.004.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.004.adm new file mode 100644 index 0000000..7660e7e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.004.adm @@ -0,0 +1,25 @@ +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.005.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.005.adm new file mode 100644 index 0000000..5db606c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.005.adm @@ -0,0 +1 @@ +{ "count": 25 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.006.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.006.adm new file mode 100644 index 0000000..7643986 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/gz/external_dataset.006.adm @@ -0,0 +1,25 @@ +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.001.adm similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.003.adm rename to asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.001.adm diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.002.adm similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/000/external_dataset.002.adm rename to asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.002.adm diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.003.adm new file mode 100644 index 0000000..5db606c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.003.adm @@ -0,0 +1 @@ +{ "count": 25 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.004.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.004.adm new file mode 100644 index 0000000..7660e7e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.004.adm @@ -0,0 +1,25 @@ +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.005.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.005.adm new file mode 100644 index 0000000..5db606c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.005.adm @@ -0,0 +1 @@ +{ "count": 25 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.006.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.006.adm new file mode 100644 index 0000000..7643986 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.006.adm @@ -0,0 +1,25 @@ +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.001.adm new file mode 100644 index 0000000..8e8fe53 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.001.adm @@ -0,0 +1 @@ +{ "count": 200 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.002.adm new file mode 100644 index 0000000..8e8fe53 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.002.adm @@ -0,0 +1 @@ +{ "count": 200 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.003.adm new file mode 100644 index 0000000..6540472 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.003.adm @@ -0,0 +1 @@ +{ "count": 50 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.004.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.004.adm new file mode 100644 index 0000000..9a52dea --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.004.adm @@ -0,0 +1,50 @@ +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.005.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.005.adm new file mode 100644 index 0000000..6540472 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.005.adm @@ -0,0 +1 @@ +{ "count": 50 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.006.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.006.adm new file mode 100644 index 0000000..722dfe1 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/mixed/external_dataset.006.adm @@ -0,0 +1,50 @@ +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml index a6774fa..007f194 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml @@ -20,8 +20,18 @@ <test-suite xmlns="urn:xml.testframework.asterix.apache.org" ResultOffsetPath="results" QueryOffsetPath="queries_sqlpp" QueryFileExtension=".sqlpp"> <test-group name="external-dataset"> <test-case FilePath="external-dataset"> - <compilation-unit name="aws/s3/json/000"> - <output-dir compare="Text">aws/s3/json/000</output-dir> + <compilation-unit name="aws/s3/json/json"> + <output-dir compare="Text">aws/s3/json/json</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="external-dataset"> + <compilation-unit name="aws/s3/json/gz"> + <output-dir compare="Text">aws/s3/json/gz</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="external-dataset"> + <compilation-unit name="aws/s3/json/mixed"> + <output-dir compare="Text">aws/s3/json/mixed</output-dir> </compilation-unit> </test-case> <test-case FilePath="external-dataset"> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java index 78b0797..315327f 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java @@ -24,8 +24,10 @@ import java.io.IOException; import java.net.URI; import java.util.List; import java.util.Map; +import java.util.zip.GZIPInputStream; import org.apache.asterix.external.input.stream.AbstractMultipleInputStream; +import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.hyracks.api.util.CleanupUtils; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; @@ -62,20 +64,27 @@ public class AwsS3InputStream extends AbstractMultipleInputStream { // Finished reading all the files if (nextFileIndex >= filePaths.size()) { if (in != null) { - in.close(); + CleanupUtils.close(in, null); } return false; } // Close the current stream before going to the next one if (in != null) { - in.close(); + CleanupUtils.close(in, null); } String bucket = configuration.get(AwsS3Constants.CONTAINER_NAME_FIELD_NAME); GetObjectRequest.Builder getObjectBuilder = GetObjectRequest.builder(); GetObjectRequest getObjectRequest = getObjectBuilder.bucket(bucket).key(filePaths.get(nextFileIndex)).build(); - in = s3Client.getObject(getObjectRequest); + + // Use the proper input stream + String filename = filePaths.get(nextFileIndex).toLowerCase(); + if (filename.endsWith(".gz") || filename.endsWith(".gzip")) { + in = new GZIPInputStream(s3Client.getObject(getObjectRequest), ExternalDataConstants.DEFAULT_BUFFER_SIZE); + } else { + in = s3Client.getObject(getObjectRequest); + } if (notificationHandler != null) { notificationHandler.notifyNewSource(); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java index 6b8bb59..451a783 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java @@ -123,12 +123,29 @@ public class AwsS3InputStreamFactory implements IInputStreamFactory { throw AsterixException.create(ErrorCode.PROVIDER_STREAM_RECORD_READER_UNKNOWN_FORMAT, fileFormat); } - s3Objects.stream().filter(object -> object.key().endsWith(fileExtension)).forEach(filesOnly::add); + // TODO(Hussain): We will have a property that can disable checking for .gz here + s3Objects.stream().filter(object -> isValidFile(object.key(), fileFormat)).forEach(filesOnly::add); return filesOnly; } /** + * Checks if the file name is of the provided format, or in the provided format in a compressed (.gz or .gzip) state + * + * @param fileName file name to be checked + * @param format expected format + * @return {@code true} if the file name is of the expected format, {@code false} otherwise + */ + private boolean isValidFile(String fileName, String format) { + String lowCaseName = fileName.toLowerCase(); + String lowCaseFormat = format.toLowerCase(); + String gzExt = lowCaseFormat + ".gz"; + String gzipExt = lowCaseFormat + ".gzip"; + + return lowCaseName.endsWith(lowCaseFormat) || lowCaseName.endsWith(gzExt) || lowCaseName.endsWith(gzipExt); + } + + /** * To efficiently utilize the parallelism, work load will be distributed amongst the partitions based on the file * size. *
