This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit e692f4e06342766116467bc5e811e6817b0a7eb3
Author: Hussain Towaileb <[email protected]>
AuthorDate: Wed May 6 13:07:39 2020 +0300

    [ASTERIXDB-2724][EXT] Handle passing empty defintion to external datasets
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - When an empty string is passed as an external dataset's definition, then
      no prefix is supplied to the AWS client (nothing to filter).
    - Added a test case for the above mentioned item.
    
    Change-Id: I500e7afb97aa076b690ef3b98ee83c8f5934a88f
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6183
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Hussain Towaileb <[email protected]>
    Reviewed-by: Dmitry Lychagin <[email protected]>
    Reviewed-by: Michael Blow <[email protected]>
---
 .../aws/AwsS3ExternalDatasetOnePartitionTest.java  |  4 +++
 .../aws/AwsS3ExternalDatasetTest.java              | 24 ++++++++++++++
 .../external_dataset.000.ddl.sqlpp                 | 37 ++++++++++++++++++++++
 .../external_dataset.001.query.sqlpp               | 23 ++++++++++++++
 .../external_dataset.099.ddl.sqlpp                 | 20 ++++++++++++
 .../external_dataset.001.adm                       |  1 +
 .../runtimets/testsuite_external_dataset.xml       | 25 ++++++++-------
 .../record/reader/aws/AwsS3InputStreamFactory.java |  3 +-
 8 files changed, 123 insertions(+), 14 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
index 88cd6b5..bd433f1 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
@@ -44,9 +44,13 @@ public class AwsS3ExternalDatasetOnePartitionTest extends 
AwsS3ExternalDatasetTe
         ONLY_TESTS = "only_external_dataset.xml";
         TEST_CONFIG_FILE_NAME = "src/test/resources/cc-single.conf";
         PREPARE_S3_BUCKET = 
AwsS3ExternalDatasetOnePartitionTest::prepareS3Bucket;
+        PREPARE_FIXED_DATA_BUCKET = 
AwsS3ExternalDatasetOnePartitionTest::prepareFixedDataBucket;
         return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
     }
 
     private static void prepareS3Bucket() {
     }
+
+    private static void prepareFixedDataBucket() {
+    }
 }
diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index b5866e9..37a3916 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -83,6 +83,7 @@ public class AwsS3ExternalDatasetTest {
     static String ONLY_TESTS;
     static String TEST_CONFIG_FILE_NAME;
     static Runnable PREPARE_S3_BUCKET;
+    static Runnable PREPARE_FIXED_DATA_BUCKET;
 
     // Base directory paths for data files
     private static final String JSON_DATA_PATH = joinPath("data", "json");
@@ -96,6 +97,7 @@ public class AwsS3ExternalDatasetTest {
     // Region, bucket and definitions
     private static final String S3_MOCK_SERVER_REGION = "us-west-2";
     private static final String S3_MOCK_SERVER_BUCKET = "playground";
+    private static final String S3_MOCK_SERVER_FIXED_DATA_BUCKET = 
"fixed-data-bucket"; // Do not use, has fixed data
     private static final String S3_MOCK_SERVER_BUCKET_JSON_DEFINITION = 
"json-data/reviews/"; // data resides here
     private static final String S3_MOCK_SERVER_BUCKET_CSV_DEFINITION = 
"csv-data/reviews/"; // data resides here
     private static final String S3_MOCK_SERVER_BUCKET_TSV_DEFINITION = 
"tsv-data/reviews/"; // data resides here
@@ -145,6 +147,7 @@ public class AwsS3ExternalDatasetTest {
         ONLY_TESTS = "only_external_dataset.xml";
         TEST_CONFIG_FILE_NAME = "src/main/resources/cc.conf";
         PREPARE_S3_BUCKET = AwsS3ExternalDatasetTest::prepareS3Bucket;
+        PREPARE_FIXED_DATA_BUCKET = 
AwsS3ExternalDatasetTest::prepareFixedDataBucket;
         return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
     }
 
@@ -187,6 +190,7 @@ public class AwsS3ExternalDatasetTest {
 
         // Create the bucket and upload some json files
         PREPARE_S3_BUCKET.run();
+        PREPARE_FIXED_DATA_BUCKET.run();
     }
 
     /**
@@ -210,6 +214,26 @@ public class AwsS3ExternalDatasetTest {
         LOGGER.info("TSV Files added successfully");
     }
 
+    /**
+     * This bucket is being filled by fixed data, a test is counting all 
records in this bucket. If this bucket is
+     * changed, the test case will fail and its result will need to be updated 
each time
+     */
+    private static void prepareFixedDataBucket() {
+        LOGGER.info("creating bucket " + S3_MOCK_SERVER_FIXED_DATA_BUCKET);
+        
client.createBucket(CreateBucketRequest.builder().bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).build());
+        LOGGER.info("bucket " + S3_MOCK_SERVER_FIXED_DATA_BUCKET + " created 
successfully");
+
+        LOGGER.info("Loading fixed data to " + 
S3_MOCK_SERVER_FIXED_DATA_BUCKET);
+
+        // Files data
+        RequestBody requestBody = 
RequestBody.fromFile(Paths.get(JSON_DATA_PATH, "single-line", 
"20-records.json"));
+        
client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("1.json").build(),
 requestBody);
+        
client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("2.json").build(),
 requestBody);
+        
client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/3.json").build(),
 requestBody);
+        
client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/4.json").build(),
 requestBody);
+        
client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/lvl2/5.json").build(),
 requestBody);
+    }
+
     private static void loadJsonFiles() {
         String dataBasePath = JSON_DATA_PATH;
         String definition = S3_MOCK_SERVER_BUCKET_JSON_DEFINITION;
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp
new file mode 100644
index 0000000..115967d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+};
+
+drop dataset test if exists;
+create external dataset test(test) using S3 (
+("accessKey"="dummyAccessKey"),
+("secretKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001";),
+("container"="fixed-data-bucket"),
+("definition"=""),
+("format"="json")
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp
new file mode 100644
index 0000000..ec40ee0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select count(*) `count` from test;
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm
new file mode 100644
index 0000000..187a8cb
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm
@@ -0,0 +1 @@
+{ "count": 100 }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
index 02846d1..551d777 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
@@ -51,19 +51,20 @@
       <compilation-unit name="aws/s3/tsv/tsv">
         <output-dir compare="Text">aws/s3/tsv/tsv</output-dir>
       </compilation-unit>
-    </test-case><test-case FilePath="external-dataset">
-    <compilation-unit name="aws/s3/tsv/gz">
-      <output-dir compare="Text">aws/s3/tsv/gz</output-dir>
-    </compilation-unit>
-  </test-case><test-case FilePath="external-dataset">
-    <compilation-unit name="aws/s3/tsv/mixed">
-      <output-dir compare="Text">aws/s3/tsv/mixed</output-dir>
-    </compilation-unit>
-  </test-case>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="aws/s3/tsv/gz">
+        <output-dir compare="Text">aws/s3/tsv/gz</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="aws/s3/tsv/mixed">
+        <output-dir compare="Text">aws/s3/tsv/mixed</output-dir>
+      </compilation-unit>
+    </test-case>
     <test-case FilePath="external-dataset">
-      <compilation-unit name="aws/s3/negative">
-        <output-dir compare="Text">aws/s3/negative</output-dir>
-        <expected-error>Parameter(s) format must be specified</expected-error>
+      <compilation-unit name="aws/s3/empty-string-definition">
+        <output-dir compare="Text">aws/s3/empty-string-definition</output-dir>
       </compilation-unit>
     </test-case>
   </test-group>
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 451a783..f0da535 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -89,7 +89,7 @@ public class AwsS3InputStreamFactory implements 
IInputStreamFactory {
         ListObjectsRequest.Builder listObjectsBuilder = 
ListObjectsRequest.builder().bucket(container);
         String path = configuration.get(AwsS3Constants.DEFINITION_FIELD_NAME);
         if (path != null) {
-            listObjectsBuilder.prefix(path + (path.endsWith("/") ? "" : "/"));
+            listObjectsBuilder.prefix(path + (!path.isEmpty() && 
!path.endsWith("/") ? "/" : ""));
         }
         ListObjectsResponse listObjectsResponse = 
s3Client.listObjects(listObjectsBuilder.build());
         List<S3Object> s3Objects = listObjectsResponse.contents();
@@ -123,7 +123,6 @@ public class AwsS3InputStreamFactory implements 
IInputStreamFactory {
             throw 
AsterixException.create(ErrorCode.PROVIDER_STREAM_RECORD_READER_UNKNOWN_FORMAT, 
fileFormat);
         }
 
-        // TODO(Hussain): We will have a property that can disable checking 
for .gz here
         s3Objects.stream().filter(object -> isValidFile(object.key(), 
fileFormat)).forEach(filesOnly::add);
 
         return filesOnly;

Reply via email to