This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit e692f4e06342766116467bc5e811e6817b0a7eb3 Author: Hussain Towaileb <[email protected]> AuthorDate: Wed May 6 13:07:39 2020 +0300 [ASTERIXDB-2724][EXT] Handle passing empty defintion to external datasets - user model changes: no - storage format changes: no - interface changes: no Details: - When an empty string is passed as an external dataset's definition, then no prefix is supplied to the AWS client (nothing to filter). - Added a test case for the above mentioned item. Change-Id: I500e7afb97aa076b690ef3b98ee83c8f5934a88f Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6183 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> Reviewed-by: Dmitry Lychagin <[email protected]> Reviewed-by: Michael Blow <[email protected]> --- .../aws/AwsS3ExternalDatasetOnePartitionTest.java | 4 +++ .../aws/AwsS3ExternalDatasetTest.java | 24 ++++++++++++++ .../external_dataset.000.ddl.sqlpp | 37 ++++++++++++++++++++++ .../external_dataset.001.query.sqlpp | 23 ++++++++++++++ .../external_dataset.099.ddl.sqlpp | 20 ++++++++++++ .../external_dataset.001.adm | 1 + .../runtimets/testsuite_external_dataset.xml | 25 ++++++++------- .../record/reader/aws/AwsS3InputStreamFactory.java | 3 +- 8 files changed, 123 insertions(+), 14 deletions(-) diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java index 88cd6b5..bd433f1 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java @@ -44,9 +44,13 @@ public class AwsS3ExternalDatasetOnePartitionTest extends AwsS3ExternalDatasetTe ONLY_TESTS = "only_external_dataset.xml"; TEST_CONFIG_FILE_NAME = "src/test/resources/cc-single.conf"; PREPARE_S3_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareS3Bucket; + PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareFixedDataBucket; return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS); } private static void prepareS3Bucket() { } + + private static void prepareFixedDataBucket() { + } } diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java index b5866e9..37a3916 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java @@ -83,6 +83,7 @@ public class AwsS3ExternalDatasetTest { static String ONLY_TESTS; static String TEST_CONFIG_FILE_NAME; static Runnable PREPARE_S3_BUCKET; + static Runnable PREPARE_FIXED_DATA_BUCKET; // Base directory paths for data files private static final String JSON_DATA_PATH = joinPath("data", "json"); @@ -96,6 +97,7 @@ public class AwsS3ExternalDatasetTest { // Region, bucket and definitions private static final String S3_MOCK_SERVER_REGION = "us-west-2"; private static final String S3_MOCK_SERVER_BUCKET = "playground"; + private static final String S3_MOCK_SERVER_FIXED_DATA_BUCKET = "fixed-data-bucket"; // Do not use, has fixed data private static final String S3_MOCK_SERVER_BUCKET_JSON_DEFINITION = "json-data/reviews/"; // data resides here private static final String S3_MOCK_SERVER_BUCKET_CSV_DEFINITION = "csv-data/reviews/"; // data resides here private static final String S3_MOCK_SERVER_BUCKET_TSV_DEFINITION = "tsv-data/reviews/"; // data resides here @@ -145,6 +147,7 @@ public class AwsS3ExternalDatasetTest { ONLY_TESTS = "only_external_dataset.xml"; TEST_CONFIG_FILE_NAME = "src/main/resources/cc.conf"; PREPARE_S3_BUCKET = AwsS3ExternalDatasetTest::prepareS3Bucket; + PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetTest::prepareFixedDataBucket; return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS); } @@ -187,6 +190,7 @@ public class AwsS3ExternalDatasetTest { // Create the bucket and upload some json files PREPARE_S3_BUCKET.run(); + PREPARE_FIXED_DATA_BUCKET.run(); } /** @@ -210,6 +214,26 @@ public class AwsS3ExternalDatasetTest { LOGGER.info("TSV Files added successfully"); } + /** + * This bucket is being filled by fixed data, a test is counting all records in this bucket. If this bucket is + * changed, the test case will fail and its result will need to be updated each time + */ + private static void prepareFixedDataBucket() { + LOGGER.info("creating bucket " + S3_MOCK_SERVER_FIXED_DATA_BUCKET); + client.createBucket(CreateBucketRequest.builder().bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).build()); + LOGGER.info("bucket " + S3_MOCK_SERVER_FIXED_DATA_BUCKET + " created successfully"); + + LOGGER.info("Loading fixed data to " + S3_MOCK_SERVER_FIXED_DATA_BUCKET); + + // Files data + RequestBody requestBody = RequestBody.fromFile(Paths.get(JSON_DATA_PATH, "single-line", "20-records.json")); + client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("1.json").build(), requestBody); + client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("2.json").build(), requestBody); + client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/3.json").build(), requestBody); + client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/4.json").build(), requestBody); + client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/lvl2/5.json").build(), requestBody); + } + private static void loadJsonFiles() { String dataBasePath = JSON_DATA_PATH; String definition = S3_MOCK_SERVER_BUCKET_JSON_DEFINITION; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp new file mode 100644 index 0000000..115967d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +drop type test if exists; +create type test as open { +}; + +drop dataset test if exists; +create external dataset test(test) using S3 ( +("accessKey"="dummyAccessKey"), +("secretKey"="dummySecretKey"), +("region"="us-west-2"), +("serviceEndpoint"="http://localhost:8001"), +("container"="fixed-data-bucket"), +("definition"=""), +("format"="json") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp new file mode 100644 index 0000000..ec40ee0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +select count(*) `count` from test; + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp new file mode 100644 index 0000000..548e632 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm new file mode 100644 index 0000000..187a8cb --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm @@ -0,0 +1 @@ +{ "count": 100 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml index 02846d1..551d777 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml @@ -51,19 +51,20 @@ <compilation-unit name="aws/s3/tsv/tsv"> <output-dir compare="Text">aws/s3/tsv/tsv</output-dir> </compilation-unit> - </test-case><test-case FilePath="external-dataset"> - <compilation-unit name="aws/s3/tsv/gz"> - <output-dir compare="Text">aws/s3/tsv/gz</output-dir> - </compilation-unit> - </test-case><test-case FilePath="external-dataset"> - <compilation-unit name="aws/s3/tsv/mixed"> - <output-dir compare="Text">aws/s3/tsv/mixed</output-dir> - </compilation-unit> - </test-case> + </test-case> + <test-case FilePath="external-dataset"> + <compilation-unit name="aws/s3/tsv/gz"> + <output-dir compare="Text">aws/s3/tsv/gz</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="external-dataset"> + <compilation-unit name="aws/s3/tsv/mixed"> + <output-dir compare="Text">aws/s3/tsv/mixed</output-dir> + </compilation-unit> + </test-case> <test-case FilePath="external-dataset"> - <compilation-unit name="aws/s3/negative"> - <output-dir compare="Text">aws/s3/negative</output-dir> - <expected-error>Parameter(s) format must be specified</expected-error> + <compilation-unit name="aws/s3/empty-string-definition"> + <output-dir compare="Text">aws/s3/empty-string-definition</output-dir> </compilation-unit> </test-case> </test-group> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java index 451a783..f0da535 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java @@ -89,7 +89,7 @@ public class AwsS3InputStreamFactory implements IInputStreamFactory { ListObjectsRequest.Builder listObjectsBuilder = ListObjectsRequest.builder().bucket(container); String path = configuration.get(AwsS3Constants.DEFINITION_FIELD_NAME); if (path != null) { - listObjectsBuilder.prefix(path + (path.endsWith("/") ? "" : "/")); + listObjectsBuilder.prefix(path + (!path.isEmpty() && !path.endsWith("/") ? "/" : "")); } ListObjectsResponse listObjectsResponse = s3Client.listObjects(listObjectsBuilder.build()); List<S3Object> s3Objects = listObjectsResponse.contents(); @@ -123,7 +123,6 @@ public class AwsS3InputStreamFactory implements IInputStreamFactory { throw AsterixException.create(ErrorCode.PROVIDER_STREAM_RECORD_READER_UNKNOWN_FORMAT, fileFormat); } - // TODO(Hussain): We will have a property that can disable checking for .gz here s3Objects.stream().filter(object -> isValidFile(object.key(), fileFormat)).forEach(filesOnly::add); return filesOnly;
