This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit cc3fbe5ac1be020be61822d6f479ee0a240cf4a4
Author: Hussain Towaileb <[email protected]>
AuthorDate: Fri May 1 17:15:21 2020 +0300

    [ASTERIXDB-2719][EXT] Add .gz compressed test cases for CSV and TSV
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - Added test cases for compressed data for CSV and TSV.
    - Addressed a comment to ensure closing the S3 stream in
      case of a failure during streaming the compressed data.
    
    Change-Id: I6160ee5f5aefa4ca60e5a88c3922bde40f4c65ec
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6123
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Hussain Towaileb <[email protected]>
    Reviewed-by: Dmitry Lychagin <[email protected]>
---
 .../aws/AwsS3ExternalDatasetTest.java              | 85 ++++++++++++++--------
 .../csv/{000 => csv}/query-dataset.000.ddl.sqlpp   |  2 +-
 .../000 => csv/csv}/query-dataset.002.query.sqlpp  |  0
 .../000 => csv/csv}/query-dataset.003.ddl.sqlpp    |  0
 .../s3/csv/{000 => gz}/query-dataset.000.ddl.sqlpp |  2 +-
 .../csv/{000 => gz}/query-dataset.002.query.sqlpp  |  0
 .../s3/csv/{000 => gz}/query-dataset.003.ddl.sqlpp |  0
 .../csv/{000 => mixed}/query-dataset.000.ddl.sqlpp |  2 +-
 .../mixed}/query-dataset.002.query.sqlpp           |  0
 .../000 => csv/mixed}/query-dataset.003.ddl.sqlpp  |  0
 .../s3/tsv/{000 => gz}/query-dataset.000.ddl.sqlpp |  2 +-
 .../tsv/{000 => gz}/query-dataset.002.query.sqlpp  |  0
 .../s3/tsv/{000 => gz}/query-dataset.003.ddl.sqlpp |  0
 .../tsv/{000 => mixed}/query-dataset.000.ddl.sqlpp |  2 +-
 .../{000 => mixed}/query-dataset.002.query.sqlpp   |  0
 .../tsv/{000 => mixed}/query-dataset.003.ddl.sqlpp |  0
 .../tsv/{000 => tsv}/query-dataset.000.ddl.sqlpp   |  2 +-
 .../tsv/{000 => tsv}/query-dataset.002.query.sqlpp |  0
 .../tsv/{000 => tsv}/query-dataset.003.ddl.sqlpp   |  0
 .../aws/s3/csv/000/external_dataset.001.adm        |  6 --
 .../aws/s3/csv/csv/external_dataset.001.adm        | 30 ++++++++
 .../aws/s3/csv/gz/external_dataset.001.adm         | 30 ++++++++
 .../aws/s3/csv/mixed/external_dataset.001.adm      | 60 +++++++++++++++
 .../aws/s3/tsv/000/external_dataset.001.adm        |  6 --
 .../aws/s3/tsv/gz/external_dataset.001.adm         | 30 ++++++++
 .../aws/s3/tsv/mixed/external_dataset.001.adm      | 60 +++++++++++++++
 .../aws/s3/tsv/tsv/external_dataset.001.adm        | 30 ++++++++
 .../runtimets/testsuite_external_dataset.xml       | 26 +++++--
 .../input/record/reader/aws/AwsS3InputStream.java  |  9 ++-
 29 files changed, 326 insertions(+), 58 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index c76a7ca..5e2d2de 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -208,15 +208,13 @@ public class AwsS3ExternalDatasetTest {
         LOGGER.info("Adding TSV files to the bucket");
         loadTsvFiles();
         LOGGER.info("TSV Files added successfully");
-
-        LOGGER.info("Files added successfully");
     }
 
     private static void loadJsonFiles() {
         String dataBasePath = JSON_DATA_PATH;
         String definition = S3_MOCK_SERVER_BUCKET_JSON_DEFINITION;
 
-        // Json data
+        // Normal format
         String definitionSegment = "json";
         loadData(dataBasePath, "single-line", "20-records.json", definition, 
definitionSegment, false);
         loadData(dataBasePath, "multi-lines", "20-records.json", definition, 
definitionSegment, false);
@@ -224,7 +222,7 @@ public class AwsS3ExternalDatasetTest {
         loadData(dataBasePath, "multi-lines-with-nested-objects", 
"5-records.json", definition, definitionSegment,
                 false);
 
-        // Json gz compressed data
+        // gz compressed format
         definitionSegment = "gz";
         loadGzData(dataBasePath, "single-line", "20-records.json", definition, 
definitionSegment, false);
         loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, 
definitionSegment, false);
@@ -232,7 +230,7 @@ public class AwsS3ExternalDatasetTest {
         loadGzData(dataBasePath, "multi-lines-with-nested-objects", 
"5-records.json", definition, definitionSegment,
                 false);
 
-        // Mixed json and json gz compressed data
+        // Mixed normal and gz compressed format
         definitionSegment = "mixed";
         loadData(dataBasePath, "single-line", "20-records.json", definition, 
definitionSegment, false);
         loadData(dataBasePath, "multi-lines", "20-records.json", definition, 
definitionSegment, false);
@@ -246,6 +244,50 @@ public class AwsS3ExternalDatasetTest {
                 false);
     }
 
+    private static void loadCsvFiles() {
+        String dataBasePath = CSV_DATA_PATH;
+        String definition = S3_MOCK_SERVER_BUCKET_CSV_DEFINITION;
+
+        // Normal format
+        String definitionSegment = "csv";
+        loadData(dataBasePath, "", "01.csv", definition, definitionSegment, 
false);
+        loadData(dataBasePath, "", "02.csv", definition, definitionSegment, 
false);
+
+        // gz compressed format
+        definitionSegment = "gz";
+        loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, 
false);
+        loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, 
false);
+
+        // Mixed normal and gz compressed format
+        definitionSegment = "mixed";
+        loadData(dataBasePath, "", "01.csv", definition, definitionSegment, 
false);
+        loadData(dataBasePath, "", "02.csv", definition, definitionSegment, 
false);
+        loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, 
false);
+        loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, 
false);
+    }
+
+    private static void loadTsvFiles() {
+        String dataBasePath = TSV_DATA_PATH;
+        String definition = S3_MOCK_SERVER_BUCKET_TSV_DEFINITION;
+
+        // Normal format
+        String definitionSegment = "tsv";
+        loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, 
false);
+        loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, 
false);
+
+        // gz compressed format
+        definitionSegment = "gz";
+        loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, 
false);
+        loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, 
false);
+
+        // Mixed normal and gz compressed format
+        definitionSegment = "mixed";
+        loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, 
false);
+        loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, 
false);
+        loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, 
false);
+        loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, 
false);
+    }
+
     private static void loadData(String fileBasePath, String filePathSegment, 
String filename, String definition,
             String definitionSegment, boolean removeExtension) {
         // Files data
@@ -262,7 +304,9 @@ public class AwsS3ExternalDatasetTest {
         }
 
         // Files base definition
-        String basePath = definition + filePathSegment + "/" + 
definitionSegment + "/";
+        filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment + 
"/";
+        definitionSegment = definitionSegment.isEmpty() ? "" : 
definitionSegment + "/";
+        String basePath = definition + filePathSegment + definitionSegment;
 
         // Load the data
         client.putObject(builder.key(basePath + finalFileName).build(), 
requestBody);
@@ -297,7 +341,9 @@ public class AwsS3ExternalDatasetTest {
             finalFileName += ".gz";
 
             // Files base definition
-            String basePath = definition + filePathSegment + "/" + 
definitionSegment + "/";
+            filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment 
+ "/";
+            definitionSegment = definitionSegment.isEmpty() ? "" : 
definitionSegment + "/";
+            String basePath = definition + filePathSegment + definitionSegment;
 
             // Load the data
             client.putObject(builder.key(basePath + finalFileName).build(), 
requestBody);
@@ -310,31 +356,6 @@ public class AwsS3ExternalDatasetTest {
         }
     }
 
-    private static void loadCsvFiles() {
-        LOGGER.info("Adding CSV files to the bucket");
-        client.putObject(
-                PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET)
-                        .key(S3_MOCK_SERVER_BUCKET_CSV_DEFINITION + 
"01.csv").build(),
-                RequestBody.fromFile(Paths.get(CSV_DATA_PATH, "01.csv")));
-        client.putObject(
-                PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET)
-                        .key(S3_MOCK_SERVER_BUCKET_CSV_DEFINITION + 
"2018/01.csv").build(),
-                RequestBody.fromFile(Paths.get(CSV_DATA_PATH, "02.csv")));
-    }
-
-    private static void loadTsvFiles() {
-        LOGGER.info("Adding TSV files to the bucket");
-        client.putObject(
-                PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET)
-                        .key(S3_MOCK_SERVER_BUCKET_TSV_DEFINITION + 
"01.tsv").build(),
-                RequestBody.fromFile(Paths.get(TSV_DATA_PATH, "01.tsv")));
-        client.putObject(
-                PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET)
-                        .key(S3_MOCK_SERVER_BUCKET_TSV_DEFINITION + 
"2018/01.tsv").build(),
-                RequestBody.fromFile(Paths.get(TSV_DATA_PATH, "02.tsv")));
-        LOGGER.info("Files added successfully");
-    }
-
     static class AwsTestExecutor extends TestExecutor {
 
         public void executeTestFile(TestCaseContext testCaseCtx, 
TestFileContext ctx, Map<String, Object> variableCtx,
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/csv/query-dataset.000.ddl.sqlpp
similarity index 96%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/csv/query-dataset.000.ddl.sqlpp
index 15ba6a8..5929d2d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/csv/query-dataset.000.ddl.sqlpp
@@ -31,7 +31,7 @@ CREATE EXTERNAL DATASET test(test) USING S3 (
 ("region"="us-west-2"),
 ("serviceEndpoint"="http://localhost:8001";),
 ("container"="playground"),
-("definition"="csv-data/reviews"),
+("definition"="csv-data/reviews/csv"),
 ("format"="Csv"),
 ("header"="false"),
 ("null"="")
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/csv/query-dataset.002.query.sqlpp
similarity index 100%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/csv/query-dataset.002.query.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/csv/query-dataset.003.ddl.sqlpp
similarity index 100%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/csv/query-dataset.003.ddl.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/gz/query-dataset.000.ddl.sqlpp
similarity index 96%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/gz/query-dataset.000.ddl.sqlpp
index 15ba6a8..9683b18 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/gz/query-dataset.000.ddl.sqlpp
@@ -31,7 +31,7 @@ CREATE EXTERNAL DATASET test(test) USING S3 (
 ("region"="us-west-2"),
 ("serviceEndpoint"="http://localhost:8001";),
 ("container"="playground"),
-("definition"="csv-data/reviews"),
+("definition"="csv-data/reviews/gz"),
 ("format"="Csv"),
 ("header"="false"),
 ("null"="")
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/gz/query-dataset.002.query.sqlpp
similarity index 100%
rename from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.002.query.sqlpp
rename to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/gz/query-dataset.002.query.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/gz/query-dataset.003.ddl.sqlpp
similarity index 100%
rename from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.003.ddl.sqlpp
rename to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/gz/query-dataset.003.ddl.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/mixed/query-dataset.000.ddl.sqlpp
similarity index 96%
rename from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
rename to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/mixed/query-dataset.000.ddl.sqlpp
index 15ba6a8..7fe8fda 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/mixed/query-dataset.000.ddl.sqlpp
@@ -31,7 +31,7 @@ CREATE EXTERNAL DATASET test(test) USING S3 (
 ("region"="us-west-2"),
 ("serviceEndpoint"="http://localhost:8001";),
 ("container"="playground"),
-("definition"="csv-data/reviews"),
+("definition"="csv-data/reviews/mixed"),
 ("format"="Csv"),
 ("header"="false"),
 ("null"="")
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/mixed/query-dataset.002.query.sqlpp
similarity index 100%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/mixed/query-dataset.002.query.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/mixed/query-dataset.003.ddl.sqlpp
similarity index 100%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/mixed/query-dataset.003.ddl.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/gz/query-dataset.000.ddl.sqlpp
similarity index 96%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/gz/query-dataset.000.ddl.sqlpp
index 3c6ad92..ac33d92 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/gz/query-dataset.000.ddl.sqlpp
@@ -31,7 +31,7 @@ CREATE EXTERNAL DATASET test(test) USING S3 (
 ("region"="us-west-2"),
 ("serviceEndpoint"="http://localhost:8001";),
 ("container"="playground"),
-("definition"="tsv-data/reviews"),
+("definition"="tsv-data/reviews/gz"),
 ("format"="TSV"),
 ("header"="False"),
 ("null"="")
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/gz/query-dataset.002.query.sqlpp
similarity index 100%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/gz/query-dataset.002.query.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/gz/query-dataset.003.ddl.sqlpp
similarity index 100%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/gz/query-dataset.003.ddl.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/mixed/query-dataset.000.ddl.sqlpp
similarity index 96%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/mixed/query-dataset.000.ddl.sqlpp
index 3c6ad92..a392c61 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/mixed/query-dataset.000.ddl.sqlpp
@@ -31,7 +31,7 @@ CREATE EXTERNAL DATASET test(test) USING S3 (
 ("region"="us-west-2"),
 ("serviceEndpoint"="http://localhost:8001";),
 ("container"="playground"),
-("definition"="tsv-data/reviews"),
+("definition"="tsv-data/reviews/mixed"),
 ("format"="TSV"),
 ("header"="False"),
 ("null"="")
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/mixed/query-dataset.002.query.sqlpp
similarity index 100%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/mixed/query-dataset.002.query.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/mixed/query-dataset.003.ddl.sqlpp
similarity index 100%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/mixed/query-dataset.003.ddl.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/tsv/query-dataset.000.ddl.sqlpp
similarity index 96%
rename from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
rename to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/tsv/query-dataset.000.ddl.sqlpp
index 3c6ad92..f9b82d4 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/tsv/query-dataset.000.ddl.sqlpp
@@ -31,7 +31,7 @@ CREATE EXTERNAL DATASET test(test) USING S3 (
 ("region"="us-west-2"),
 ("serviceEndpoint"="http://localhost:8001";),
 ("container"="playground"),
-("definition"="tsv-data/reviews"),
+("definition"="tsv-data/reviews/tsv"),
 ("format"="TSV"),
 ("header"="False"),
 ("null"="")
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/tsv/query-dataset.002.query.sqlpp
similarity index 100%
rename from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.002.query.sqlpp
rename to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/tsv/query-dataset.002.query.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/tsv/query-dataset.003.ddl.sqlpp
similarity index 100%
rename from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.003.ddl.sqlpp
rename to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/tsv/query-dataset.003.ddl.sqlpp
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/000/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/000/external_dataset.001.adm
deleted file mode 100644
index 93d1b57..0000000
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/000/external_dataset.001.adm
+++ /dev/null
@@ -1,6 +0,0 @@
-{ "id": 1, "year": null, "review": "good", "details": "recommend" }
-{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
-{ "id": 3, "year": null, "review": "good", "details": null }
-{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
-{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
-{ "id": 6, "year": 2018, "review": "good", "details": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/csv/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/csv/external_dataset.001.adm
new file mode 100644
index 0000000..4dbd63a
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/csv/external_dataset.001.adm
@@ -0,0 +1,30 @@
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/gz/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/gz/external_dataset.001.adm
new file mode 100644
index 0000000..4dbd63a
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/gz/external_dataset.001.adm
@@ -0,0 +1,30 @@
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/mixed/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/mixed/external_dataset.001.adm
new file mode 100644
index 0000000..7aec747
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv/mixed/external_dataset.001.adm
@@ -0,0 +1,60 @@
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/000/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/000/external_dataset.001.adm
deleted file mode 100644
index 1954b05..0000000
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/000/external_dataset.001.adm
+++ /dev/null
@@ -1,6 +0,0 @@
-{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
-{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
-{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
-{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
-{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
-{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/gz/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/gz/external_dataset.001.adm
new file mode 100644
index 0000000..e04a6b5
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/gz/external_dataset.001.adm
@@ -0,0 +1,30 @@
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/mixed/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/mixed/external_dataset.001.adm
new file mode 100644
index 0000000..4823286
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/mixed/external_dataset.001.adm
@@ -0,0 +1,60 @@
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/tsv/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/tsv/external_dataset.001.adm
new file mode 100644
index 0000000..e04a6b5
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/tsv/tsv/external_dataset.001.adm
@@ -0,0 +1,30 @@
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
index 007f194..02846d1 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
@@ -35,15 +35,31 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
-      <compilation-unit name="aws/s3/csv/000">
-        <output-dir compare="Text">aws/s3/csv/000</output-dir>
+      <compilation-unit name="aws/s3/csv/csv">
+        <output-dir compare="Text">aws/s3/csv/csv</output-dir>
+      </compilation-unit>
+    </test-case><test-case FilePath="external-dataset">
+      <compilation-unit name="aws/s3/csv/gz">
+        <output-dir compare="Text">aws/s3/csv/gz</output-dir>
+      </compilation-unit>
+    </test-case><test-case FilePath="external-dataset">
+      <compilation-unit name="aws/s3/csv/mixed">
+        <output-dir compare="Text">aws/s3/csv/mixed</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
-      <compilation-unit name="aws/s3/tsv/000">
-        <output-dir compare="Text">aws/s3/tsv/000</output-dir>
+      <compilation-unit name="aws/s3/tsv/tsv">
+        <output-dir compare="Text">aws/s3/tsv/tsv</output-dir>
       </compilation-unit>
-    </test-case>
+    </test-case><test-case FilePath="external-dataset">
+    <compilation-unit name="aws/s3/tsv/gz">
+      <output-dir compare="Text">aws/s3/tsv/gz</output-dir>
+    </compilation-unit>
+  </test-case><test-case FilePath="external-dataset">
+    <compilation-unit name="aws/s3/tsv/mixed">
+      <output-dir compare="Text">aws/s3/tsv/mixed</output-dir>
+    </compilation-unit>
+  </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="aws/s3/negative">
         <output-dir compare="Text">aws/s3/negative</output-dir>
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
index 315327f..d725687 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
@@ -78,13 +78,16 @@ public class AwsS3InputStream extends 
AbstractMultipleInputStream {
         GetObjectRequest.Builder getObjectBuilder = GetObjectRequest.builder();
         GetObjectRequest getObjectRequest = 
getObjectBuilder.bucket(bucket).key(filePaths.get(nextFileIndex)).build();
 
-        // Use the proper input stream
+        // Have a reference to the S3 stream to ensure that if GZipInputStream 
causes an IOException because of reading
+        // the header, then the S3 stream gets closed in the close method
+        in = s3Client.getObject(getObjectRequest);
+
+        // Use gzip stream if needed
         String filename = filePaths.get(nextFileIndex).toLowerCase();
         if (filename.endsWith(".gz") || filename.endsWith(".gzip")) {
             in = new GZIPInputStream(s3Client.getObject(getObjectRequest), 
ExternalDataConstants.DEFAULT_BUFFER_SIZE);
-        } else {
-            in = s3Client.getObject(getObjectRequest);
         }
+
         if (notificationHandler != null) {
             notificationHandler.notifyNewSource();
         }

Reply via email to